From c285bf5350f14b8d6a7486841074480dea446c74 Mon Sep 17 00:00:00 2001 From: Mike Chan Date: Thu, 22 Jan 2009 12:23:32 -0800 Subject: [PATCH 0001/1442] ANDROID: Add android config documentation to boot framework. Signed-off-by: Mike Chan --- Documentation/android.txt | 121 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 Documentation/android.txt diff --git a/Documentation/android.txt b/Documentation/android.txt new file mode 100644 index 000000000000..72a62afdf202 --- /dev/null +++ b/Documentation/android.txt @@ -0,0 +1,121 @@ + ============= + A N D R O I D + ============= + +Copyright (C) 2009 Google, Inc. +Written by Mike Chan + +CONTENTS: +--------- + +1. Android + 1.1 Required enabled config options + 1.2 Required disabled config options + 1.3 Recommended enabled config options +2. Contact + + +1. Android +========== + +Android (www.android.com) is an open source operating system for mobile devices. +This document describes configurations needed to run the Android framework on +top of the Linux kernel. + +To see a working defconfig look at msm_defconfig or goldfish_defconfig +which can be found at http://android.git.kernel.org in kernel/common.git +and kernel/msm.git + + +1.1 Required enabled config options +----------------------------------- +After building a standard defconfig, ensure that these options are enabled in +your .config or defconfig if they are not already. Based off the msm_defconfig. +You should keep the rest of the default options enabled in the defconfig +unless you know what you are doing. + +ANDROID_PARANOID_NETWORK +ASHMEM +CONFIG_FB_MODE_HELPERS +CONFIG_FONT_8x16 +CONFIG_FONT_8x8 +CONFIG_YAFFS_SHORT_NAMES_IN_RAM +DAB +EARLYSUSPEND +FB +FB_CFB_COPYAREA +FB_CFB_FILLRECT +FB_CFB_IMAGEBLIT +FB_DEFERRED_IO +FB_TILEBLITTING +HIGH_RES_TIMERS +INOTIFY +INOTIFY_USER +INPUT_EVDEV +INPUT_GPIO +INPUT_MISC +LEDS_CLASS +LEDS_GPIO +LOCK_KERNEL +LkOGGER +LOW_MEMORY_KILLER +MISC_DEVICES +NEW_LEDS +NO_HZ +POWER_SUPPLY +PREEMPT +RAMFS +RTC_CLASS +RTC_LIB +SWITCH +SWITCH_GPIO +TMPFS +UID_STAT +UID16 +USB_FUNCTION +USB_FUNCTION_ADB +USER_WAKELOCK +VIDEO_OUTPUT_CONTROL +WAKELOCK +YAFFS_AUTO_YAFFS2 +YAFFS_FS +YAFFS_YAFFS1 +YAFFS_YAFFS2 + + +1.2 Required disabled config options +------------------------------------ +CONFIG_YAFFS_DISABLE_LAZY_LOAD +DNOTIFY + + +1.3 Recommended enabled config options +------------------------------ +ANDROID_PMEM +ANDROID_RAM_CONSOLE +ANDROID_RAM_CONSOLE_ERROR_CORRECTION +SCHEDSTATS +DEBUG_PREEMPT +DEBUG_MUTEXES +DEBUG_SPINLOCK_SLEEP +DEBUG_INFO +FRAME_POINTER +CPU_FREQ +CPU_FREQ_TABLE +CPU_FREQ_DEFAULT_GOV_ONDEMAND +CPU_FREQ_GOV_ONDEMAND +CRC_CCITT +EMBEDDED +INPUT_TOUCHSCREEN +I2C +I2C_BOARDINFO +LOG_BUF_SHIFT=17 +SERIAL_CORE +SERIAL_CORE_CONSOLE + + +2. Contact +========== +website: http://android.git.kernel.org + +mailing-lists: android-kernel@googlegroups.com -- GitLab From 6f1936763451de78df64f2aecb9d340fa78ef9ac Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 15 Dec 2011 18:51:04 -0800 Subject: [PATCH 0002/1442] ANDROID: ashmem: Add shmem_set_file to mm/shmem.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NOT FOR STAGING This patch re-adds the original shmem_set_file to mm/shmem.c and converts ashmem.c back to using it. Change-Id: Ie604c9f8f4d0ee6bc2aae1a96d261c8373a1a2dc CC: Brian Swetland CC: Colin Cross CC: Arve Hjønnevåg CC: Dima Zavin CC: Robert Love CC: Greg KH Signed-off-by: John Stultz --- drivers/staging/android/ashmem.c | 20 ++++++-------------- include/linux/mm.h | 1 + mm/shmem.c | 13 +++++++++---- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index ca9a53c03f0f..32ab11a42b1f 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -409,22 +409,14 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) } get_file(asma->file); - /* - * XXX - Reworked to use shmem_zero_setup() instead of - * shmem_set_file while we're in staging. -jstultz - */ - if (vma->vm_flags & VM_SHARED) { - ret = shmem_zero_setup(vma); - if (ret) { - fput(asma->file); - goto out; - } + if (vma->vm_flags & VM_SHARED) + shmem_set_file(vma, asma->file); + else { + if (vma->vm_file) + fput(vma->vm_file); + vma->vm_file = asma->file; } - if (vma->vm_file) - fput(vma->vm_file); - vma->vm_file = asma->file; - out: mutex_unlock(&ashmem_mutex); return ret; diff --git a/include/linux/mm.h b/include/linux/mm.h index a92c8d73aeaf..44a8f6a5e1e3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1138,6 +1138,7 @@ extern void pagefault_out_of_memory(void); extern void show_free_areas(unsigned int flags); extern bool skip_free_areas_node(unsigned int flags, int nid); +void shmem_set_file(struct vm_area_struct *vma, struct file *file); int shmem_zero_setup(struct vm_area_struct *); #ifdef CONFIG_SHMEM bool shmem_mapping(struct address_space *mapping); diff --git a/mm/shmem.c b/mm/shmem.c index 9d32e1cb9f38..e9c2b6e0ed93 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4052,6 +4052,14 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags } EXPORT_SYMBOL_GPL(shmem_file_setup); +void shmem_set_file(struct vm_area_struct *vma, struct file *file) +{ + if (vma->vm_file) + fput(vma->vm_file); + vma->vm_file = file; + vma->vm_ops = &shmem_vm_ops; +} + /** * shmem_zero_setup - setup a shared anonymous mapping * @vma: the vma to be mmapped is prepared by do_mmap_pgoff @@ -4071,10 +4079,7 @@ int shmem_zero_setup(struct vm_area_struct *vma) if (IS_ERR(file)) return PTR_ERR(file); - if (vma->vm_file) - fput(vma->vm_file); - vma->vm_file = file; - vma->vm_ops = &shmem_vm_ops; + shmem_set_file(vma, file); if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) && ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) < -- GitLab From b3505d203681c6d97be40d9130ccdf0bbf65a432 Mon Sep 17 00:00:00 2001 From: Tobias Lindskog Date: Mon, 9 Feb 2015 08:10:39 +0100 Subject: [PATCH 0003/1442] ANDROID: Shrink ashmem directly through shmem_fallocate When ashmem_shrink is called from direct reclaim on a user thread, a call to do_fallocate will check for permissions against the security policy of that user thread. It can thus fail by chance if called on a thread that isn't permitted to modify the relevant ashmem areas. Because we know that we have a shmem file underneath, call the shmem implementation of fallocate directly instead of going through the user-space interface for fallocate. FIX=DMS06243560 Area: Kernel/Linux Kernel Bug: 21951515 Change-Id: Ie98fff18a2bdeb535cd24d4fbdd13677e12681a7 Signed-off-by: Jeff Vander Stoep --- drivers/staging/android/ashmem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 32ab11a42b1f..3a52b29efab9 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -453,9 +453,9 @@ ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) loff_t start = range->pgstart * PAGE_SIZE; loff_t end = (range->pgend + 1) * PAGE_SIZE; - vfs_fallocate(range->asma->file, - FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - start, end - start); + range->asma->file->f_op->fallocate(range->asma->file, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, end - start); range->purged = ASHMEM_WAS_PURGED; lru_del(range); -- GitLab From 56eaecc8ecf398c7af3fa252eff6164102b0ace1 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 7 Dec 2016 14:54:38 +0100 Subject: [PATCH 0004/1442] hotplug: Make register and unregister notifier API symmetric commit 777c6e0daebb3fcefbbd6f620410a946b07ef6d0 upstream. Yu Zhao has noticed that __unregister_cpu_notifier only unregisters its notifiers when HOTPLUG_CPU=y while the registration might succeed even when HOTPLUG_CPU=n if MODULE is enabled. This means that e.g. zswap might keep a stale notifier on the list on the manual clean up during the pool tear down and thus corrupt the list. Resulting in the following [ 144.964346] BUG: unable to handle kernel paging request at ffff880658a2be78 [ 144.971337] IP: [] raw_notifier_chain_register+0x1b/0x40 [ 145.122628] Call Trace: [ 145.125086] [] __register_cpu_notifier+0x18/0x20 [ 145.131350] [] zswap_pool_create+0x273/0x400 [ 145.137268] [] __zswap_param_set+0x1fc/0x300 [ 145.143188] [] ? trace_hardirqs_on+0xd/0x10 [ 145.149018] [] ? kernel_param_lock+0x28/0x30 [ 145.154940] [] ? __might_fault+0x4f/0xa0 [ 145.160511] [] zswap_compressor_param_set+0x17/0x20 [ 145.167035] [] param_attr_store+0x5c/0xb0 [ 145.172694] [] module_attr_store+0x1d/0x30 [ 145.178443] [] sysfs_kf_write+0x4f/0x70 [ 145.183925] [] kernfs_fop_write+0x149/0x180 [ 145.189761] [] __vfs_write+0x18/0x40 [ 145.194982] [] vfs_write+0xb2/0x1a0 [ 145.200122] [] SyS_write+0x52/0xa0 [ 145.205177] [] entry_SYSCALL_64_fastpath+0x12/0x17 This can be even triggered manually by changing /sys/module/zswap/parameters/compressor multiple times. Fix this issue by making unregister APIs symmetric to the register so there are no surprises. Fixes: 47e627bc8c9a ("[PATCH] hotplug: Allow modules to use the cpu hotplug notifiers even if !CONFIG_HOTPLUG_CPU") Reported-and-tested-by: Yu Zhao Signed-off-by: Michal Hocko Cc: linux-mm@kvack.org Cc: Andrew Morton Cc: Dan Streetman Link: http://lkml.kernel.org/r/20161207135438.4310-1-mhocko@kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu.h | 15 ++++----------- kernel/cpu.c | 2 +- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index b886dc17f2f3..e571128ad99a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -93,22 +93,16 @@ extern bool cpuhp_tasks_frozen; { .notifier_call = fn, .priority = pri }; \ __register_cpu_notifier(&fn##_nb); \ } -#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); extern int __register_cpu_notifier(struct notifier_block *nb); extern void unregister_cpu_notifier(struct notifier_block *nb); extern void __unregister_cpu_notifier(struct notifier_block *nb); -#else -#ifndef MODULE -extern int register_cpu_notifier(struct notifier_block *nb); -extern int __register_cpu_notifier(struct notifier_block *nb); -#else +#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ +#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) + static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; @@ -118,7 +112,6 @@ static inline int __register_cpu_notifier(struct notifier_block *nb) { return 0; } -#endif static inline void unregister_cpu_notifier(struct notifier_block *nb) { diff --git a/kernel/cpu.c b/kernel/cpu.c index 29de1a9352c0..217fd2e7f435 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -659,7 +659,6 @@ void __init cpuhp_threads_init(void) kthread_unpark(this_cpu_read(cpuhp_state.thread)); } -#ifdef CONFIG_HOTPLUG_CPU EXPORT_SYMBOL(register_cpu_notifier); EXPORT_SYMBOL(__register_cpu_notifier); void unregister_cpu_notifier(struct notifier_block *nb) @@ -676,6 +675,7 @@ void __unregister_cpu_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(__unregister_cpu_notifier); +#ifdef CONFIG_HOTPLUG_CPU /** * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU * @cpu: a CPU id -- GitLab From 3bac322e18c3c5f3fc1d77ab0db1fca2e3b7cb03 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Mon, 12 Dec 2016 14:32:44 -0800 Subject: [PATCH 0005/1442] btrfs: limit async_work allocation and worker func duration commit 2939e1a86f758b55cdba73e29397dd3d94df13bc upstream. Problem statement: unprivileged user who has read-write access to more than one btrfs subvolume may easily consume all kernel memory (eventually triggering oom-killer). Reproducer (./mkrmdir below essentially loops over mkdir/rmdir): [root@kteam1 ~]# cat prep.sh DEV=/dev/sdb mkfs.btrfs -f $DEV mount $DEV /mnt for i in `seq 1 16` do mkdir /mnt/$i btrfs subvolume create /mnt/SV_$i ID=`btrfs subvolume list /mnt |grep "SV_$i$" |cut -d ' ' -f 2` mount -t btrfs -o subvolid=$ID $DEV /mnt/$i chmod a+rwx /mnt/$i done [root@kteam1 ~]# sh prep.sh [maxim@kteam1 ~]$ for i in `seq 1 16`; do ./mkrmdir /mnt/$i 2000 2000 & done [root@kteam1 ~]# for i in `seq 1 4`; do grep "kmalloc-128" /proc/slabinfo | grep -v dma; sleep 60; done kmalloc-128 10144 10144 128 32 1 : tunables 0 0 0 : slabdata 317 317 0 kmalloc-128 9992352 9992352 128 32 1 : tunables 0 0 0 : slabdata 312261 312261 0 kmalloc-128 24226752 24226752 128 32 1 : tunables 0 0 0 : slabdata 757086 757086 0 kmalloc-128 42754240 42754240 128 32 1 : tunables 0 0 0 : slabdata 1336070 1336070 0 The huge numbers above come from insane number of async_work-s allocated and queued by btrfs_wq_run_delayed_node. The problem is caused by btrfs_wq_run_delayed_node() queuing more and more works if the number of delayed items is above BTRFS_DELAYED_BACKGROUND. The worker func (btrfs_async_run_delayed_root) processes at least BTRFS_DELAYED_BATCH items (if they are present in the list). So, the machinery works as expected while the list is almost empty. As soon as it is getting bigger, worker func starts to process more than one item at a time, it takes longer, and the chances to have async_works queued more than needed is getting higher. The problem above is worsened by another flaw of delayed-inode implementation: if async_work was queued in a throttling branch (number of items >= BTRFS_DELAYED_WRITEBACK), corresponding worker func won't quit until the number of items < BTRFS_DELAYED_BACKGROUND / 2. So, it is possible that the func occupies CPU infinitely (up to 30sec in my experiments): while the func is trying to drain the list, the user activity may add more and more items to the list. The patch fixes both problems in straightforward way: refuse queuing too many works in btrfs_wq_run_delayed_node and bail out of worker func if at least BTRFS_DELAYED_WRITEBACK items are processed. Changed in v2: remove support of thresh == NO_THRESHOLD. Signed-off-by: Maxim Patlasov Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/async-thread.c | 14 ++++++++++++++ fs/btrfs/async-thread.h | 1 + fs/btrfs/delayed-inode.c | 6 ++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index e0f071f6b5a7..63d197724519 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -86,6 +86,20 @@ btrfs_work_owner(struct btrfs_work *work) return work->wq->fs_info; } +bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq) +{ + /* + * We could compare wq->normal->pending with num_online_cpus() + * to support "thresh == NO_THRESHOLD" case, but it requires + * moving up atomic_inc/dec in thresh_queue/exec_hook. Let's + * postpone it until someone needs the support of that case. + */ + if (wq->normal->thresh == NO_THRESHOLD) + return false; + + return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2; +} + BTRFS_WORK_HELPER(worker_helper); BTRFS_WORK_HELPER(delalloc_helper); BTRFS_WORK_HELPER(flush_delalloc_helper); diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 8e52484cd461..1f9597355c9d 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -84,4 +84,5 @@ void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); void btrfs_set_work_high_priority(struct btrfs_work *work); struct btrfs_fs_info *btrfs_work_owner(struct btrfs_work *work); struct btrfs_fs_info *btrfs_workqueue_owner(struct __btrfs_workqueue *wq); +bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq); #endif diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 0fcf5f25d524..4d8f8a8c9c90 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1353,7 +1353,8 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work) total_done++; btrfs_release_prepared_delayed_node(delayed_node); - if (async_work->nr == 0 || total_done < async_work->nr) + if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) || + total_done < async_work->nr) goto again; free_path: @@ -1369,7 +1370,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, { struct btrfs_async_delayed_work *async_work; - if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) + if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND || + btrfs_workqueue_normal_congested(fs_info->delayed_workers)) return 0; async_work = kmalloc(sizeof(*async_work), GFP_NOFS); -- GitLab From 7d470f04e36c6a23838e9bb384535b7c61fe372c Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 2 Sep 2016 12:35:34 -0700 Subject: [PATCH 0006/1442] Btrfs: fix BUG_ON in btrfs_mark_buffer_dirty commit ef85b25e982b5bba1530b936e283ef129f02ab9d upstream. This can only happen with CONFIG_BTRFS_FS_CHECK_INTEGRITY=y. Commit 1ba98d0 ("Btrfs: detect corruption when non-root leaf has zero item") assumes that a leaf is its root when leaf->bytenr == btrfs_root_bytenr(root), however, we should not use btrfs_root_bytenr(root) since it's mainly got updated during committing transaction. So the check can fail when doing COW on this leaf while it is a root. This changes to use "if (leaf == btrfs_root_node(root))" instead, just like how we check whether leaf is a root in __btrfs_cow_block(). Fixes: 1ba98d086fe3 (Btrfs: detect corruption when non-root leaf has zero item) Reported-by: Jeff Mahoney Signed-off-by: Liu Bo Reviewed-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3a57f99d96aa..c4e673a94426 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -572,13 +572,17 @@ static noinline int check_leaf(struct btrfs_root *root, * open_ctree() some roots has not yet been set up. */ if (!IS_ERR_OR_NULL(check_root)) { + struct extent_buffer *eb; + + eb = btrfs_root_node(check_root); /* if leaf is the root, then it's fine */ - if (leaf->start != - btrfs_root_bytenr(&check_root->root_item)) { + if (leaf != eb) { CORRUPT("non-root leaf's nritems is 0", - leaf, root, 0); + leaf, check_root, 0); + free_extent_buffer(eb); return -EIO; } + free_extent_buffer(eb); } return 0; } -- GitLab From 664b053c53638f9a7b9f129d65182268fb504bf0 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Fri, 28 Oct 2016 10:48:26 +0800 Subject: [PATCH 0007/1442] Btrfs: fix deadlock caused by fsync when logging directory entries commit ec125cfb7ae2157af3dd45dd8abe823e3e233eec upstream. While logging new directory entries, at tree-log.c:log_new_dir_dentries(), after we call btrfs_search_forward() we get a leaf with a read lock on it, and without unlocking that leaf we can end up calling btrfs_iget() to get an inode pointer. The later (btrfs_iget()) can end up doing a read-only search on the same tree again, if the inode is not in memory already, which ends up causing a deadlock if some other task in the meanwhile started a write search on the tree and is attempting to write lock the same leaf that btrfs_search_forward() locked while holding write locks on upper levels of the tree blocking the read search from btrfs_iget(). In this scenario we get a deadlock. So fix this by releasing the search path before calling btrfs_iget() at tree-log.c:log_new_dir_dentries(). Example trace of such deadlock: [ 4077.478852] kworker/u24:10 D ffff88107fc90640 0 14431 2 0x00000000 [ 4077.486752] Workqueue: btrfs-endio-write btrfs_endio_write_helper [btrfs] [ 4077.494346] ffff880ffa56bad0 0000000000000046 0000000000009000 ffff880ffa56bfd8 [ 4077.502629] ffff880ffa56bfd8 ffff881016ce21c0 ffffffffa06ecb26 ffff88101a5d6138 [ 4077.510915] ffff880ebb5173b0 ffff880ffa56baf8 ffff880ebb517410 ffff881016ce21c0 [ 4077.519202] Call Trace: [ 4077.528752] [] ? btrfs_tree_lock+0xdd/0x2f0 [btrfs] [ 4077.536049] [] ? wake_up_atomic_t+0x30/0x30 [ 4077.542574] [] ? btrfs_search_slot+0x79f/0xb10 [btrfs] [ 4077.550171] [] ? btrfs_lookup_file_extent+0x33/0x40 [btrfs] [ 4077.558252] [] ? __btrfs_drop_extents+0x13b/0xdf0 [btrfs] [ 4077.566140] [] ? add_delayed_data_ref+0xe2/0x150 [btrfs] [ 4077.573928] [] ? btrfs_add_delayed_data_ref+0x149/0x1d0 [btrfs] [ 4077.582399] [] ? __set_extent_bit+0x4c0/0x5c0 [btrfs] [ 4077.589896] [] ? insert_reserved_file_extent.constprop.75+0xa4/0x320 [btrfs] [ 4077.599632] [] ? start_transaction+0x8d/0x470 [btrfs] [ 4077.607134] [] ? btrfs_finish_ordered_io+0x2e7/0x600 [btrfs] [ 4077.615329] [] ? process_one_work+0x142/0x3d0 [ 4077.622043] [] ? worker_thread+0x109/0x3b0 [ 4077.628459] [] ? manage_workers.isra.26+0x270/0x270 [ 4077.635759] [] ? kthread+0xaf/0xc0 [ 4077.641404] [] ? kthread_create_on_node+0x110/0x110 [ 4077.648696] [] ? ret_from_fork+0x58/0x90 [ 4077.654926] [] ? kthread_create_on_node+0x110/0x110 [ 4078.358087] kworker/u24:15 D ffff88107fcd0640 0 14436 2 0x00000000 [ 4078.365981] Workqueue: btrfs-endio-write btrfs_endio_write_helper [btrfs] [ 4078.373574] ffff880ffa57fad0 0000000000000046 0000000000009000 ffff880ffa57ffd8 [ 4078.381864] ffff880ffa57ffd8 ffff88103004d0a0 ffffffffa06ecb26 ffff88101a5d6138 [ 4078.390163] ffff880fbeffc298 ffff880ffa57faf8 ffff880fbeffc2f8 ffff88103004d0a0 [ 4078.398466] Call Trace: [ 4078.408019] [] ? btrfs_tree_lock+0xdd/0x2f0 [btrfs] [ 4078.415322] [] ? wake_up_atomic_t+0x30/0x30 [ 4078.421844] [] ? btrfs_search_slot+0x79f/0xb10 [btrfs] [ 4078.429438] [] ? btrfs_lookup_file_extent+0x33/0x40 [btrfs] [ 4078.437518] [] ? __btrfs_drop_extents+0x13b/0xdf0 [btrfs] [ 4078.445404] [] ? add_delayed_data_ref+0xe2/0x150 [btrfs] [ 4078.453194] [] ? btrfs_add_delayed_data_ref+0x149/0x1d0 [btrfs] [ 4078.461663] [] ? __set_extent_bit+0x4c0/0x5c0 [btrfs] [ 4078.469161] [] ? insert_reserved_file_extent.constprop.75+0xa4/0x320 [btrfs] [ 4078.478893] [] ? start_transaction+0x8d/0x470 [btrfs] [ 4078.486388] [] ? btrfs_finish_ordered_io+0x2e7/0x600 [btrfs] [ 4078.494561] [] ? process_one_work+0x142/0x3d0 [ 4078.501278] [] ? pwq_activate_delayed_work+0x27/0x40 [ 4078.508673] [] ? worker_thread+0x109/0x3b0 [ 4078.515098] [] ? manage_workers.isra.26+0x270/0x270 [ 4078.522396] [] ? kthread+0xaf/0xc0 [ 4078.528032] [] ? kthread_create_on_node+0x110/0x110 [ 4078.535325] [] ? ret_from_fork+0x58/0x90 [ 4078.541552] [] ? kthread_create_on_node+0x110/0x110 [ 4079.355824] user-space-program D ffff88107fd30640 0 32020 1 0x00000000 [ 4079.363716] ffff880eae8eba10 0000000000000086 0000000000009000 ffff880eae8ebfd8 [ 4079.372003] ffff880eae8ebfd8 ffff881016c162c0 ffffffffa06ecb26 ffff88101a5d6138 [ 4079.380294] ffff880fbed4b4c8 ffff880eae8eba38 ffff880fbed4b528 ffff881016c162c0 [ 4079.388586] Call Trace: [ 4079.398134] [] ? btrfs_tree_lock+0x85/0x2f0 [btrfs] [ 4079.405431] [] ? wake_up_atomic_t+0x30/0x30 [ 4079.411955] [] ? btrfs_lock_root_node+0x2b/0x40 [btrfs] [ 4079.419644] [] ? btrfs_search_slot+0xa03/0xb10 [btrfs] [ 4079.427237] [] ? btrfs_buffer_uptodate+0x52/0x70 [btrfs] [ 4079.435041] [] ? generic_bin_search.constprop.38+0x80/0x190 [btrfs] [ 4079.443897] [] ? btrfs_insert_empty_items+0x74/0xd0 [btrfs] [ 4079.451975] [] ? copy_items+0x128/0x850 [btrfs] [ 4079.458890] [] ? btrfs_log_inode+0x629/0xbf3 [btrfs] [ 4079.466292] [] ? btrfs_log_inode_parent+0xc61/0xf30 [btrfs] [ 4079.474373] [] ? btrfs_log_dentry_safe+0x59/0x80 [btrfs] [ 4079.482161] [] ? btrfs_sync_file+0x20d/0x330 [btrfs] [ 4079.489558] [] ? do_fsync+0x4c/0x80 [ 4079.495300] [] ? SyS_fdatasync+0xa/0x10 [ 4079.501422] [] ? system_call_fastpath+0x16/0x1b [ 4079.508334] user-space-program D ffff88107fc30640 0 32021 1 0x00000004 [ 4079.516226] ffff880eae8efbf8 0000000000000086 0000000000009000 ffff880eae8effd8 [ 4079.524513] ffff880eae8effd8 ffff881030279610 ffffffffa06ecb26 ffff88101a5d6138 [ 4079.532802] ffff880ebb671d88 ffff880eae8efc20 ffff880ebb671de8 ffff881030279610 [ 4079.541092] Call Trace: [ 4079.550642] [] ? btrfs_tree_lock+0x85/0x2f0 [btrfs] [ 4079.557941] [] ? wake_up_atomic_t+0x30/0x30 [ 4079.564463] [] ? btrfs_search_slot+0x79f/0xb10 [btrfs] [ 4079.572058] [] ? btrfs_truncate_inode_items+0x168/0xb90 [btrfs] [ 4079.580526] [] ? join_transaction.isra.15+0x1e/0x3a0 [btrfs] [ 4079.588701] [] ? start_transaction+0x8d/0x470 [btrfs] [ 4079.596196] [] ? block_rsv_add_bytes+0x16/0x50 [btrfs] [ 4079.603789] [] ? btrfs_truncate+0xe9/0x2e0 [btrfs] [ 4079.610994] [] ? btrfs_setattr+0x30b/0x410 [btrfs] [ 4079.618197] [] ? notify_change+0x1dc/0x680 [ 4079.624625] [] ? aa_path_perm+0xd4/0x160 [ 4079.630854] [] ? do_truncate+0x5b/0x90 [ 4079.636889] [] ? do_sys_ftruncate.constprop.15+0x10a/0x160 [ 4079.644869] [] ? SyS_fcntl+0x5b/0x570 [ 4079.650805] [] ? system_call_fastpath+0x16/0x1b [ 4080.410607] user-space-program D ffff88107fc70640 0 32028 12639 0x00000004 [ 4080.418489] ffff880eaeccbbe0 0000000000000086 0000000000009000 ffff880eaeccbfd8 [ 4080.426778] ffff880eaeccbfd8 ffff880f317ef1e0 ffffffffa06ecb26 ffff88101a5d6138 [ 4080.435067] ffff880ef7e93928 ffff880f317ef1e0 ffff880eaeccbc08 ffff880f317ef1e0 [ 4080.443353] Call Trace: [ 4080.452920] [] ? btrfs_tree_read_lock+0xdd/0x190 [btrfs] [ 4080.460703] [] ? wake_up_atomic_t+0x30/0x30 [ 4080.467225] [] ? btrfs_read_lock_root_node+0x2b/0x40 [btrfs] [ 4080.475400] [] ? btrfs_search_slot+0x801/0xb10 [btrfs] [ 4080.482994] [] ? btrfs_clean_one_deleted_snapshot+0xe0/0xe0 [btrfs] [ 4080.491857] [] ? btrfs_lookup_inode+0x26/0x90 [btrfs] [ 4080.499353] [] ? kmem_cache_alloc+0xaf/0xc0 [ 4080.505879] [] ? btrfs_iget+0xd5/0x5d0 [btrfs] [ 4080.512696] [] ? btrfs_get_token_64+0x104/0x120 [btrfs] [ 4080.520387] [] ? btrfs_log_inode_parent+0xbdf/0xf30 [btrfs] [ 4080.528469] [] ? btrfs_log_dentry_safe+0x59/0x80 [btrfs] [ 4080.536258] [] ? btrfs_sync_file+0x20d/0x330 [btrfs] [ 4080.543657] [] ? do_fsync+0x4c/0x80 [ 4080.549399] [] ? SyS_fdatasync+0xa/0x10 [ 4080.555534] [] ? system_call_fastpath+0x16/0x1b Signed-off-by: Robbie Ko Reviewed-by: Filipe Manana Fixes: 2f2ff0ee5e43 (Btrfs: fix metadata inconsistencies after directory fsync) Signed-off-by: Filipe Manana [Modified changelog for clarity and correctness] Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3d33c4e41e5f..23dc153cf60b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5205,6 +5205,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, if (di_key.type == BTRFS_ROOT_ITEM_KEY) continue; + btrfs_release_path(path); di_inode = btrfs_iget(root->fs_info->sb, &di_key, root, NULL); if (IS_ERR(di_inode)) { @@ -5214,13 +5215,12 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, if (btrfs_inode_in_log(di_inode, trans->transid)) { iput(di_inode); - continue; + break; } ctx->log_new_dentries = false; if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK) log_mode = LOG_INODE_ALL; - btrfs_release_path(path); ret = btrfs_log_inode(trans, root, di_inode, log_mode, 0, LLONG_MAX, ctx); if (!ret && -- GitLab From 26dc52465f0df557d154ab59f8567431d4876671 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Fri, 7 Oct 2016 17:30:47 +0800 Subject: [PATCH 0008/1442] Btrfs: fix tree search logic when replaying directory entry deletes commit 2a7bf53f577e49c43de4ffa7776056de26db65d9 upstream. If a log tree has a layout like the following: leaf N: ... item 240 key (282 DIR_LOG_ITEM 0) itemoff 8189 itemsize 8 dir log end 1275809046 leaf N + 1: item 0 key (282 DIR_LOG_ITEM 3936149215) itemoff 16275 itemsize 8 dir log end 18446744073709551615 ... When we pass the value 1275809046 + 1 as the parameter start_ret to the function tree-log.c:find_dir_range() (done by replay_dir_deletes()), we end up with path->slots[0] having the value 239 (points to the last item of leaf N, item 240). Because the dir log item in that position has an offset value smaller than *start_ret (1275809046 + 1) we need to move on to the next leaf, however the logic for that is wrong since it compares the current slot to the number of items in the leaf, which is smaller and therefore we don't lookup for the next leaf but instead we set the slot to point to an item that does not exist, at slot 240, and we later operate on that slot which has unexpected content or in the worst case can result in an invalid memory access (accessing beyond the last page of leaf N's extent buffer). So fix the logic that checks when we need to lookup at the next leaf by first incrementing the slot and only after to check if that slot is beyond the last item of the current leaf. Signed-off-by: Robbie Ko Reviewed-by: Filipe Manana Fixes: e02119d5a7b4 (Btrfs: Add a write ahead tree log to optimize synchronous operations) Signed-off-by: Filipe Manana [Modified changelog for clarity and correctness] Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 23dc153cf60b..b89004513c09 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1940,12 +1940,11 @@ static noinline int find_dir_range(struct btrfs_root *root, next: /* check the next slot in the tree to see if it is a valid item */ nritems = btrfs_header_nritems(path->nodes[0]); + path->slots[0]++; if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(root, path); if (ret) goto out; - } else { - path->slots[0]++; } btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); -- GitLab From 01f285fe1d885dadfd7439205c700340ec69ab13 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2016 11:23:31 +0000 Subject: [PATCH 0009/1442] Btrfs: fix relocation incorrectly dropping data references commit 054570a1dc94de20e7a612cddcc5a97db9c37b6f upstream. During relocation of a data block group we create a relocation tree for each fs/subvol tree by making a snapshot of each tree using btrfs_copy_root() and the tree's commit root, and then setting the last snapshot field for the fs/subvol tree's root to the value of the current transaction id minus 1. However this can lead to relocation later dropping references that it did not create if we have qgroups enabled, leaving the filesystem in an inconsistent state that keeps aborting transactions. Lets consider the following example to explain the problem, which requires qgroups to be enabled. We are relocating data block group Y, we have a subvolume with id 258 that has a root at level 1, that subvolume is used to store directory entries for snapshots and we are currently at transaction 3404. When committing transaction 3404, we have a pending snapshot and therefore we call btrfs_run_delayed_items() at transaction.c:create_pending_snapshot() in order to create its dentry at subvolume 258. This results in COWing leaf A from root 258 in order to add the dentry. Note that leaf A also contains file extent items referring to extents from some other block group X (we are currently relocating block group Y). Later on, still at create_pending_snapshot() we call qgroup_account_snapshot(), which switches the commit root for root 258 when it calls switch_commit_roots(), so now the COWed version of leaf A, lets call it leaf A', is accessible from the commit root of tree 258. At the end of qgroup_account_snapshot(), we call record_root_in_trans() with 258 as its argument, which results in btrfs_init_reloc_root() being called, which in turn calls relocation.c:create_reloc_root() in order to create a relocation tree associated to root 258, which results in assigning the value of 3403 (which is the current transaction id minus 1 = 3404 - 1) to the last_snapshot field of root 258. When creating the relocation tree root at ctree.c:btrfs_copy_root() we add a shared reference for leaf A', corresponding to the relocation tree's root, when we call btrfs_inc_ref() against the COWed root (a copy of the commit root from tree 258), which is at level 1. So at this point leaf A' has 2 references, one normal reference corresponding to root 258 and one shared reference corresponding to the root of the relocation tree. Transaction 3404 finishes its commit and transaction 3405 is started by relocation when calling merge_reloc_root() for the relocation tree associated to root 258. In the meanwhile leaf A' is COWed again, in response to some filesystem operation, when we are still at transaction 3405. However when we COW leaf A', at ctree.c:update_ref_for_cow(), we call btrfs_block_can_be_shared() in order to figure out if other trees refer to the leaf and if any such trees exists, add a full back reference to leaf A' - but btrfs_block_can_be_shared() incorrectly returns false because the following condition is false: btrfs_header_generation(buf) <= btrfs_root_last_snapshot(&root->root_item) which evaluates to 3404 <= 3403. So after leaf A' is COWed, it stays with only one reference, corresponding to the shared reference we created when we called btrfs_copy_root() to create the relocation tree's root and btrfs_inc_ref() ends up not being called for leaf A' nor we end up setting the flag BTRFS_BLOCK_FLAG_FULL_BACKREF in leaf A'. This results in not adding shared references for the extents from block group X that leaf A' refers to with its file extent items. Later, after merging the relocation root we do a call to to btrfs_drop_snapshot() in order to delete the relocation tree. This ends up calling do_walk_down() when path->slots[1] points to leaf A', which results in calling btrfs_lookup_extent_info() to get the number of references for leaf A', which is 1 at this time (only the shared reference exists) and this value is stored at wc->refs[0]. After this walk_up_proc() is called when wc->level is 0 and path->nodes[0] corresponds to leaf A'. Because the current level is 0 and wc->refs[0] is 1, it does call btrfs_dec_ref() against leaf A', which results in removing the single references that the extents from block group X have which are associated to root 258 - the expectation was to have each of these extents with 2 references - one reference for root 258 and one shared reference related to the root of the relocation tree, and so we would drop only the shared reference (because leaf A' was supposed to have the flag BTRFS_BLOCK_FLAG_FULL_BACKREF set). This leaves the filesystem in an inconsistent state as we now have file extent items in a subvolume tree that point to extents from block group X without references in the extent tree. So later on when we try to decrement the references for these extents, for example due to a file unlink operation, truncate operation or overwriting ranges of a file, we fail because the expected references do not exist in the extent tree. This leads to warnings and transaction aborts like the following: [ 588.965795] ------------[ cut here ]------------ [ 588.965815] WARNING: CPU: 2 PID: 2479 at fs/btrfs/extent-tree.c:1625 lookup_inline_extent_backref+0x432/0x5b0 [btrfs] [ 588.965816] Modules linked in: af_packet iscsi_ibft iscsi_boot_sysfs xfs libcrc32c ppdev acpi_cpufreq button tpm_tis e1000 i2c_piix4 pcspkr parport_pc parport tpm qemu_fw_cfg joydev btrfs xor raid6_pq sr_mod cdrom ata_generic virtio_scsi ata_piix virtio_pci bochs_drm virtio_ring drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops virtio ttm serio_raw drm floppy sg [ 588.965831] CPU: 2 PID: 2479 Comm: kworker/u8:7 Not tainted 4.7.3-3-default-fdm+ #1 [ 588.965832] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 [ 588.965844] Workqueue: btrfs-extent-refs btrfs_extent_refs_helper [btrfs] [ 588.965845] 0000000000000000 ffff8802263bfa28 ffffffff813af542 0000000000000000 [ 588.965847] 0000000000000000 ffff8802263bfa68 ffffffff81081e8b 0000065900000000 [ 588.965848] ffff8801db2af000 000000012bbe2000 0000000000000000 ffff880215703b48 [ 588.965849] Call Trace: [ 588.965852] [] dump_stack+0x63/0x81 [ 588.965854] [] __warn+0xcb/0xf0 [ 588.965855] [] warn_slowpath_null+0x1d/0x20 [ 588.965863] [] lookup_inline_extent_backref+0x432/0x5b0 [btrfs] [ 588.965865] [] ? trace_clock_local+0x10/0x30 [ 588.965867] [] ? rb_reserve_next_event+0x6f/0x460 [ 588.965875] [] insert_inline_extent_backref+0x55/0xd0 [btrfs] [ 588.965882] [] __btrfs_inc_extent_ref.isra.55+0x8f/0x240 [btrfs] [ 588.965890] [] __btrfs_run_delayed_refs+0x74a/0x1260 [btrfs] [ 588.965892] [] ? cpuacct_charge+0x86/0xa0 [ 588.965900] [] btrfs_run_delayed_refs+0x9f/0x2c0 [btrfs] [ 588.965908] [] delayed_ref_async_start+0x94/0xb0 [btrfs] [ 588.965918] [] btrfs_scrubparity_helper+0xca/0x350 [btrfs] [ 588.965928] [] btrfs_extent_refs_helper+0xe/0x10 [btrfs] [ 588.965930] [] process_one_work+0x1f3/0x4e0 [ 588.965931] [] worker_thread+0x48/0x4e0 [ 588.965932] [] ? process_one_work+0x4e0/0x4e0 [ 588.965934] [] kthread+0xc9/0xe0 [ 588.965936] [] ret_from_fork+0x1f/0x40 [ 588.965937] [] ? kthread_worker_fn+0x170/0x170 [ 588.965938] ---[ end trace 34e5232c933a1749 ]--- [ 588.966187] ------------[ cut here ]------------ [ 588.966196] WARNING: CPU: 2 PID: 2479 at fs/btrfs/extent-tree.c:2966 btrfs_run_delayed_refs+0x28c/0x2c0 [btrfs] [ 588.966196] BTRFS: Transaction aborted (error -5) [ 588.966197] Modules linked in: af_packet iscsi_ibft iscsi_boot_sysfs xfs libcrc32c ppdev acpi_cpufreq button tpm_tis e1000 i2c_piix4 pcspkr parport_pc parport tpm qemu_fw_cfg joydev btrfs xor raid6_pq sr_mod cdrom ata_generic virtio_scsi ata_piix virtio_pci bochs_drm virtio_ring drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops virtio ttm serio_raw drm floppy sg [ 588.966206] CPU: 2 PID: 2479 Comm: kworker/u8:7 Tainted: G W 4.7.3-3-default-fdm+ #1 [ 588.966207] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 [ 588.966217] Workqueue: btrfs-extent-refs btrfs_extent_refs_helper [btrfs] [ 588.966217] 0000000000000000 ffff8802263bfc98 ffffffff813af542 ffff8802263bfce8 [ 588.966219] 0000000000000000 ffff8802263bfcd8 ffffffff81081e8b 00000b96345ee000 [ 588.966220] ffffffffa021ae1c ffff880215703b48 00000000000005fe ffff8802345ee000 [ 588.966221] Call Trace: [ 588.966223] [] dump_stack+0x63/0x81 [ 588.966224] [] __warn+0xcb/0xf0 [ 588.966225] [] warn_slowpath_fmt+0x4f/0x60 [ 588.966233] [] btrfs_run_delayed_refs+0x28c/0x2c0 [btrfs] [ 588.966241] [] delayed_ref_async_start+0x94/0xb0 [btrfs] [ 588.966250] [] btrfs_scrubparity_helper+0xca/0x350 [btrfs] [ 588.966259] [] btrfs_extent_refs_helper+0xe/0x10 [btrfs] [ 588.966260] [] process_one_work+0x1f3/0x4e0 [ 588.966261] [] worker_thread+0x48/0x4e0 [ 588.966263] [] ? process_one_work+0x4e0/0x4e0 [ 588.966264] [] kthread+0xc9/0xe0 [ 588.966265] [] ret_from_fork+0x1f/0x40 [ 588.966267] [] ? kthread_worker_fn+0x170/0x170 [ 588.966268] ---[ end trace 34e5232c933a174a ]--- [ 588.966269] BTRFS: error (device sda2) in btrfs_run_delayed_refs:2966: errno=-5 IO failure [ 588.966270] BTRFS info (device sda2): forced readonly This was happening often on openSUSE and SLE systems using btrfs as the root filesystem (with its default layout where multiple subvolumes are used) where balance happens in the background triggered by a cron job and snapshots are automatically created before/after package installations, upgrades and removals. The issue could be triggered simply by running the following loop on the first system boot post installation: while true; do zypper -n in nfs-kernel-server zypper -n rm nfs-kernel-server done (If we were fast enough and made that loop before the cron job triggered a balance operation and the balance finished) So fix by setting the last_snapshot field of the root to the value of the generation of its commit root. Like this btrfs_block_can_be_shared() behaves correctly for the case where the relocation root is created during a transaction commit and for the case where it's created before a transaction commit. Fixes: 6426c7ad697d (btrfs: qgroup: Fix qgroup accounting when creating snapshot) Signed-off-by: Filipe Manana Reviewed-by: Josef Bacik Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/relocation.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c4af0cdb783d..2cf5e142675e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1395,14 +1395,23 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, root_key.offset = objectid; if (root->root_key.objectid == objectid) { + u64 commit_root_gen; + /* called by btrfs_init_reloc_root */ ret = btrfs_copy_root(trans, root, root->commit_root, &eb, BTRFS_TREE_RELOC_OBJECTID); BUG_ON(ret); - last_snap = btrfs_root_last_snapshot(&root->root_item); - btrfs_set_root_last_snapshot(&root->root_item, - trans->transid - 1); + /* + * Set the last_snapshot field to the generation of the commit + * root - like this ctree.c:btrfs_block_can_be_shared() behaves + * correctly (returns true) when the relocation root is created + * either inside the critical section of a transaction commit + * (through transaction.c:qgroup_account_snapshot()) and when + * it's created before the transaction commit is started. + */ + commit_root_gen = btrfs_header_generation(root->commit_root); + btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen); } else { /* * called by btrfs_reloc_post_snapshot_hook. -- GitLab From c01ea880e88abedec7c4f02fcd9a110ce73c7ccb Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 1 Nov 2016 14:21:23 +0100 Subject: [PATCH 0010/1442] btrfs: store and load values of stripes_min/stripes_max in balance status item commit ed0df618b1b06d7431ee4d985317fc5419a5d559 upstream. The balance status item contains currently known filter values, but the stripes filter was unintentionally not among them. This would mean, that interrupted and automatically restarted balance does not apply the stripe filters. Fixes: dee32d0ac3719ef8d640efaf0884111df444730f Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0b8ce2b9f7d0..86245b884fce 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2210,6 +2210,8 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, cpu->target = le64_to_cpu(disk->target); cpu->flags = le64_to_cpu(disk->flags); cpu->limit = le64_to_cpu(disk->limit); + cpu->stripes_min = le32_to_cpu(disk->stripes_min); + cpu->stripes_max = le32_to_cpu(disk->stripes_max); } static inline void @@ -2228,6 +2230,8 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, disk->target = cpu_to_le64(cpu->target); disk->flags = cpu_to_le64(cpu->flags); disk->limit = cpu_to_le64(cpu->limit); + disk->stripes_min = cpu_to_le32(cpu->stripes_min); + disk->stripes_max = cpu_to_le32(cpu->stripes_max); } /* struct btrfs_super_block */ -- GitLab From a1e0e0476afb2471f9cee0a995206b053a8241f8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 23 Nov 2016 16:21:18 +0000 Subject: [PATCH 0011/1442] Btrfs: fix emptiness check for dirtied extent buffers at check_leaf() commit f177d73949bf758542ca15a1c1945bd2e802cc65 upstream. We can not simply use the owner field from an extent buffer's header to get the id of the respective tree when the extent buffer is from a relocation tree. When we create the root for a relocation tree we leave (on purpose) the owner field with the same value as the subvolume's tree root (we do this at ctree.c:btrfs_copy_root()). So we must ignore extent buffers from relocation trees, which have the BTRFS_HEADER_FLAG_RELOC flag set, because otherwise we will always consider the extent buffer as not being the root of the tree (the root of original subvolume tree is always different from the root of the respective relocation tree). This lead to assertion failures when running with the integrity checker enabled (CONFIG_BTRFS_FS_CHECK_INTEGRITY=y) such as the following: [ 643.393409] BTRFS critical (device sdg): corrupt leaf, non-root leaf's nritems is 0: block=38506496, root=260, slot=0 [ 643.397609] BTRFS info (device sdg): leaf 38506496 total ptrs 0 free space 3995 [ 643.407075] assertion failed: 0, file: fs/btrfs/disk-io.c, line: 4078 [ 643.408425] ------------[ cut here ]------------ [ 643.409112] kernel BUG at fs/btrfs/ctree.h:3419! [ 643.409773] invalid opcode: 0000 [#1] PREEMPT SMP [ 643.410447] Modules linked in: dm_flakey dm_mod crc32c_generic btrfs xor raid6_pq ppdev psmouse acpi_cpufreq parport_pc evdev parport tpm_tis tpm_tis_core pcspkr serio_raw i2c_piix4 sg tpm i2c_core button processor loop autofs4 ext4 crc16 jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring scsi_mod virtio e1000 floppy [ 643.414356] CPU: 11 PID: 32726 Comm: btrfs Not tainted 4.8.0-rc8-btrfs-next-35+ #1 [ 643.414356] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 [ 643.414356] task: ffff880145e95b00 task.stack: ffff88014826c000 [ 643.414356] RIP: 0010:[] [] assfail.constprop.41+0x1c/0x1e [btrfs] [ 643.414356] RSP: 0018:ffff88014826fa28 EFLAGS: 00010292 [ 643.414356] RAX: 0000000000000039 RBX: ffff88014e2d7c38 RCX: 0000000000000001 [ 643.414356] RDX: ffff88023f4d2f58 RSI: ffffffff81806c63 RDI: 00000000ffffffff [ 643.414356] RBP: ffff88014826fa28 R08: 0000000000000001 R09: 0000000000000000 [ 643.414356] R10: ffff88014826f918 R11: ffffffff82f3c5ed R12: ffff880172910000 [ 643.414356] R13: ffff880233992230 R14: ffff8801a68a3310 R15: fffffffffffffff8 [ 643.414356] FS: 00007f9ca305e8c0(0000) GS:ffff88023f4c0000(0000) knlGS:0000000000000000 [ 643.414356] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 643.414356] CR2: 00007f9ca3071000 CR3: 000000015d01b000 CR4: 00000000000006e0 [ 643.414356] Stack: [ 643.414356] ffff88014826fa50 ffffffffa02d655a 000000000000000a ffff88014e2d7c38 [ 643.414356] 0000000000000000 ffff88014826faa8 ffffffffa02b72f3 ffff88014826fab8 [ 643.414356] 00ffffffa03228e4 0000000000000000 0000000000000000 ffff8801bbd4e000 [ 643.414356] Call Trace: [ 643.414356] [] btrfs_mark_buffer_dirty+0xdf/0xe5 [btrfs] [ 643.414356] [] btrfs_copy_root+0x18a/0x1d1 [btrfs] [ 643.414356] [] create_reloc_root+0x72/0x1ba [btrfs] [ 643.414356] [] btrfs_init_reloc_root+0x7b/0xa7 [btrfs] [ 643.414356] [] record_root_in_trans+0xdf/0xed [btrfs] [ 643.414356] [] btrfs_record_root_in_trans+0x50/0x6a [btrfs] [ 643.414356] [] create_subvol+0x472/0x773 [btrfs] [ 643.414356] [] btrfs_mksubvol+0x3da/0x463 [btrfs] [ 643.414356] [] ? btrfs_mksubvol+0x3da/0x463 [btrfs] [ 643.414356] [] ? preempt_count_add+0x65/0x68 [ 643.414356] [] ? __mnt_want_write+0x62/0x77 [ 643.414356] [] btrfs_ioctl_snap_create_transid+0xce/0x187 [btrfs] [ 643.414356] [] btrfs_ioctl_snap_create+0x67/0x81 [btrfs] [ 643.414356] [] btrfs_ioctl+0x508/0x20dd [btrfs] [ 643.414356] [] ? __this_cpu_preempt_check+0x13/0x15 [ 643.414356] [] ? handle_mm_fault+0x976/0x9ab [ 643.414356] [] ? arch_local_irq_save+0x9/0xc [ 643.414356] [] vfs_ioctl+0x18/0x34 [ 643.414356] [] do_vfs_ioctl+0x581/0x600 [ 643.414356] [] ? entry_SYSCALL_64_fastpath+0x5/0xa8 [ 643.414356] [] ? trace_hardirqs_on_caller+0x17b/0x197 [ 643.414356] [] SyS_ioctl+0x57/0x79 [ 643.414356] [] entry_SYSCALL_64_fastpath+0x18/0xa8 [ 643.414356] [] ? trace_hardirqs_off_caller+0x3f/0xaa [ 643.414356] Code: 89 83 88 00 00 00 31 c0 5b 41 5c 41 5d 5d c3 55 89 f1 48 c7 c2 98 bc 35 a0 48 89 fe 48 c7 c7 05 be 35 a0 48 89 e5 e8 13 46 dd e0 <0f> 0b 55 89 f1 48 c7 c2 9f d3 35 a0 48 89 fe 48 c7 c7 7a d5 35 [ 643.414356] RIP [] assfail.constprop.41+0x1c/0x1e [btrfs] [ 643.414356] RSP [ 643.468267] ---[ end trace 6a1b3fb1a9d7d6e3 ]--- This can be easily reproduced by running xfstests with the integrity checker enabled. Fixes: 1ba98d086fe3 (Btrfs: detect corruption when non-root leaf has zero item) Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c4e673a94426..1cd325765aaa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -559,7 +559,15 @@ static noinline int check_leaf(struct btrfs_root *root, u32 nritems = btrfs_header_nritems(leaf); int slot; - if (nritems == 0) { + /* + * Extent buffers from a relocation tree have a owner field that + * corresponds to the subvolume tree they are based on. So just from an + * extent buffer alone we can not find out what is the id of the + * corresponding subvolume tree, so we can not figure out if the extent + * buffer corresponds to the root of the relocation tree or not. So skip + * this check for relocation trees. + */ + if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { struct btrfs_root *check_root; key.objectid = btrfs_header_owner(leaf); @@ -587,6 +595,9 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } + if (nritems == 0) + return 0; + /* Check the 0 item */ if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)) { -- GitLab From 1a5ec7dd17a98547618ea7b0ef3bbeb3837a7c49 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 24 Nov 2016 02:09:04 +0000 Subject: [PATCH 0012/1442] Btrfs: fix qgroup rescan worker initialization commit 8d9eddad19467b008e0c881bc3133d7da94b7ec1 upstream. We were setting the qgroup_rescan_running flag to true only after the rescan worker started (which is a task run by a queue). So if a user space task starts a rescan and immediately after asks to wait for the rescan worker to finish, this second call might happen before the rescan worker task starts running, in which case the rescan wait ioctl returns immediatley, not waiting for the rescan worker to finish. This was making the fstest btrfs/022 fail very often. Fixes: d2c609b834d6 (btrfs: properly track when rescan worker is running) Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 11f4fffe503e..dfd99867ff4d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2335,10 +2335,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) int err = -ENOMEM; int ret = 0; - mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_rescan_running = true; - mutex_unlock(&fs_info->qgroup_rescan_lock); - path = btrfs_alloc_path(); if (!path) goto out; @@ -2449,6 +2445,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, sizeof(fs_info->qgroup_rescan_progress)); fs_info->qgroup_rescan_progress.objectid = progress_objectid; init_completion(&fs_info->qgroup_rescan_completion); + fs_info->qgroup_rescan_running = true; spin_unlock(&fs_info->qgroup_lock); mutex_unlock(&fs_info->qgroup_rescan_lock); -- GitLab From 142513d6dc7ca378d90537abcc341d65dfaf438e Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 1 Dec 2016 16:38:39 +0100 Subject: [PATCH 0013/1442] USB: serial: option: add support for Telit LE922A PIDs 0x1040, 0x1041 commit 5b09eff0c379002527ad72ea5ea38f25da8a8650 upstream. This patch adds support for PIDs 0x1040, 0x1041 of Telit LE922A. Since the interface positions are the same than the ones used for other Telit compositions, previous defined blacklists are used. Signed-off-by: Daniele Palmas Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 9894e341c6ac..33ae20392398 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -268,6 +268,8 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_CC864_SINGLE 0x1006 #define TELIT_PRODUCT_DE910_DUAL 0x1010 #define TELIT_PRODUCT_UE910_V2 0x1012 +#define TELIT_PRODUCT_LE922_USBCFG1 0x1040 +#define TELIT_PRODUCT_LE922_USBCFG2 0x1041 #define TELIT_PRODUCT_LE922_USBCFG0 0x1042 #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 @@ -1210,6 +1212,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0), .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG1), + .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG2), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3), .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff), -- GitLab From 5e7c90bd53c61f878eb18b02f388d54ba47eca83 Mon Sep 17 00:00:00 2001 From: Giuseppe Lippolis Date: Tue, 6 Dec 2016 21:24:19 +0100 Subject: [PATCH 0014/1442] USB: serial: option: add dlink dwm-158 commit d8a12b7117b42fd708f1e908498350232bdbd5ff upstream. Adding registration for 3G modem DWM-158 in usb-serial-option Signed-off-by: Giuseppe Lippolis Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 33ae20392398..7ce31a4c7e7f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1995,6 +1995,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) }, + { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff) }, /* D-Link DWM-158 */ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff), /* D-Link DWM-221 B1 */ .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ -- GitLab From c094cd32b0c714cc94608ca8c706261d50bc43e5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Nov 2016 16:55:01 +0100 Subject: [PATCH 0015/1442] USB: serial: kl5kusb105: fix open error path commit 6774d5f53271d5f60464f824748995b71da401ab upstream. Kill urbs and disable read before returning from open on failure to retrieve the line state. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/kl5kusb105.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index fc5d3a791e08..6f29bfadbe33 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -296,7 +296,7 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) rc = usb_serial_generic_open(tty, port); if (rc) { retval = rc; - goto exit; + goto err_free_cfg; } rc = usb_control_msg(port->serial->dev, @@ -315,17 +315,32 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) dev_dbg(&port->dev, "%s - enabled reading\n", __func__); rc = klsi_105_get_line_state(port, &line_state); - if (rc >= 0) { - spin_lock_irqsave(&priv->lock, flags); - priv->line_state = line_state; - spin_unlock_irqrestore(&priv->lock, flags); - dev_dbg(&port->dev, "%s - read line state 0x%lx\n", __func__, line_state); - retval = 0; - } else + if (rc < 0) { retval = rc; + goto err_disable_read; + } + + spin_lock_irqsave(&priv->lock, flags); + priv->line_state = line_state; + spin_unlock_irqrestore(&priv->lock, flags); + dev_dbg(&port->dev, "%s - read line state 0x%lx\n", __func__, + line_state); + + return 0; -exit: +err_disable_read: + usb_control_msg(port->serial->dev, + usb_sndctrlpipe(port->serial->dev, 0), + KL5KUSB105A_SIO_CONFIGURE, + USB_TYPE_VENDOR | USB_DIR_OUT, + KL5KUSB105A_SIO_CONFIGURE_READ_OFF, + 0, /* index */ + NULL, 0, + KLSI_TIMEOUT); + usb_serial_generic_close(port); +err_free_cfg: kfree(cfg); + return retval; } -- GitLab From 20d7c1a68b5b9fc7fe94c543a9920d9d7234017c Mon Sep 17 00:00:00 2001 From: Nathaniel Quillin Date: Mon, 5 Dec 2016 06:53:00 -0800 Subject: [PATCH 0016/1442] USB: cdc-acm: add device id for GW Instek AFG-125 commit 301216044e4c27d5a7323c1fa766266fad00db5e upstream. Add device-id entry for GW Instek AFG-125, which has a byte swapped bInterfaceSubClass (0x20). Signed-off-by: Nathaniel Quillin Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index fada988512a1..c5ff13f22b24 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1719,6 +1719,7 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x20df, 0x0001), /* Simtec Electronics Entropy Key */ .driver_info = QUIRK_CONTROL_LINE_STATE, }, { USB_DEVICE(0x2184, 0x001c) }, /* GW Instek AFG-2225 */ + { USB_DEVICE(0x2184, 0x0036) }, /* GW Instek AFG-125 */ { USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */ }, /* Motorola H24 HSPA module: */ -- GitLab From 3666b6280351c88592f7d767052d948fc81aeb18 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 22 Sep 2016 11:01:01 +0300 Subject: [PATCH 0017/1442] usb: dwc3: gadget: set PCM1 field of isochronous-first TRBs commit 6b9018d4c1e5c958625be94a160a5984351d4632 upstream. In case of High-Speed, High-Bandwidth endpoints, we need to tell DWC3 that we have more than one packet per interval. We do that by setting PCM1 field of Isochronous-First TRB. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 1dfa56a5f1c5..b3687e223e00 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -771,6 +771,9 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep, unsigned length, unsigned chain, unsigned node) { struct dwc3_trb *trb; + struct dwc3 *dwc = dep->dwc; + struct usb_gadget *gadget = &dwc->gadget; + enum usb_device_speed speed = gadget->speed; dwc3_trace(trace_dwc3_gadget, "%s: req %p dma %08llx length %d%s", dep->name, req, (unsigned long long) dma, @@ -798,10 +801,16 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep, break; case USB_ENDPOINT_XFER_ISOC: - if (!node) + if (!node) { trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS_FIRST; - else + + if (speed == USB_SPEED_HIGH) { + struct usb_ep *ep = &dep->endpoint; + trb->size |= DWC3_TRB_SIZE_PCM1(ep->mult - 1); + } + } else { trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS; + } /* always enable Interrupt on Missed ISOC */ trb->ctrl |= DWC3_TRB_CTRL_ISP_IMI; -- GitLab From 32a35351b7ec93947efb394f252d9ad714a04f18 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 17 Nov 2016 11:14:14 +0200 Subject: [PATCH 0018/1442] usb: hub: Fix auto-remount of safely removed or ejected USB-3 devices commit 37be66767e3cae4fd16e064d8bb7f9f72bf5c045 upstream. USB-3 does not have any link state that will avoid negotiating a connection with a plugged-in cable but will signal the host when the cable is unplugged. For USB-3 we used to first set the link to Disabled, then to RxDdetect to be able to detect cable connects or disconnects. But in RxDetect the connected device is detected again and eventually enabled. Instead set the link into U3 and disable remote wakeups for the device. This is what Windows does, and what Alan Stern suggested. Cc: Alan Stern Acked-by: Alan Stern Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 101 +++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 65 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index cbb146736f57..0d81436c94bd 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -101,6 +101,8 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); +static void hub_usb3_port_prepare_disable(struct usb_hub *hub, + struct usb_port *port_dev); static inline char *portspeed(struct usb_hub *hub, int portstatus) { @@ -899,82 +901,28 @@ static int hub_set_port_link_state(struct usb_hub *hub, int port1, } /* - * If USB 3.0 ports are placed into the Disabled state, they will no longer - * detect any device connects or disconnects. This is generally not what the - * USB core wants, since it expects a disabled port to produce a port status - * change event when a new device connects. - * - * Instead, set the link state to Disabled, wait for the link to settle into - * that state, clear any change bits, and then put the port into the RxDetect - * state. + * USB-3 does not have a similar link state as USB-2 that will avoid negotiating + * a connection with a plugged-in cable but will signal the host when the cable + * is unplugged. Disable remote wake and set link state to U3 for USB-3 devices */ -static int hub_usb3_port_disable(struct usb_hub *hub, int port1) -{ - int ret; - int total_time; - u16 portchange, portstatus; - - if (!hub_is_superspeed(hub->hdev)) - return -EINVAL; - - ret = hub_port_status(hub, port1, &portstatus, &portchange); - if (ret < 0) - return ret; - - /* - * USB controller Advanced Micro Devices, Inc. [AMD] FCH USB XHCI - * Controller [1022:7814] will have spurious result making the following - * usb 3.0 device hotplugging route to the 2.0 root hub and recognized - * as high-speed device if we set the usb 3.0 port link state to - * Disabled. Since it's already in USB_SS_PORT_LS_RX_DETECT state, we - * check the state here to avoid the bug. - */ - if ((portstatus & USB_PORT_STAT_LINK_STATE) == - USB_SS_PORT_LS_RX_DETECT) { - dev_dbg(&hub->ports[port1 - 1]->dev, - "Not disabling port; link state is RxDetect\n"); - return ret; - } - - ret = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_SS_DISABLED); - if (ret) - return ret; - - /* Wait for the link to enter the disabled state. */ - for (total_time = 0; ; total_time += HUB_DEBOUNCE_STEP) { - ret = hub_port_status(hub, port1, &portstatus, &portchange); - if (ret < 0) - return ret; - - if ((portstatus & USB_PORT_STAT_LINK_STATE) == - USB_SS_PORT_LS_SS_DISABLED) - break; - if (total_time >= HUB_DEBOUNCE_TIMEOUT) - break; - msleep(HUB_DEBOUNCE_STEP); - } - if (total_time >= HUB_DEBOUNCE_TIMEOUT) - dev_warn(&hub->ports[port1 - 1]->dev, - "Could not disable after %d ms\n", total_time); - - return hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_RX_DETECT); -} - static int hub_port_disable(struct usb_hub *hub, int port1, int set_state) { struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *hdev = hub->hdev; int ret = 0; - if (port_dev->child && set_state) - usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED); if (!hub->error) { - if (hub_is_superspeed(hub->hdev)) - ret = hub_usb3_port_disable(hub, port1); - else + if (hub_is_superspeed(hub->hdev)) { + hub_usb3_port_prepare_disable(hub, port_dev); + ret = hub_set_port_link_state(hub, port_dev->portnum, + USB_SS_PORT_LS_U3); + } else { ret = usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_ENABLE); + } } + if (port_dev->child && set_state) + usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED); if (ret && ret != -ENODEV) dev_err(&port_dev->dev, "cannot disable (err = %d)\n", ret); return ret; @@ -4140,6 +4088,26 @@ void usb_unlocked_enable_lpm(struct usb_device *udev) } EXPORT_SYMBOL_GPL(usb_unlocked_enable_lpm); +/* usb3 devices use U3 for disabled, make sure remote wakeup is disabled */ +static void hub_usb3_port_prepare_disable(struct usb_hub *hub, + struct usb_port *port_dev) +{ + struct usb_device *udev = port_dev->child; + int ret; + + if (udev && udev->port_is_suspended && udev->do_remote_wakeup) { + ret = hub_set_port_link_state(hub, port_dev->portnum, + USB_SS_PORT_LS_U0); + if (!ret) { + msleep(USB_RESUME_TIMEOUT); + ret = usb_disable_remote_wakeup(udev); + } + if (ret) + dev_warn(&udev->dev, + "Port disable: can't disable remote wake\n"); + udev->do_remote_wakeup = 0; + } +} #else /* CONFIG_PM */ @@ -4147,6 +4115,9 @@ EXPORT_SYMBOL_GPL(usb_unlocked_enable_lpm); #define hub_resume NULL #define hub_reset_resume NULL +static inline void hub_usb3_port_prepare_disable(struct usb_hub *hub, + struct usb_port *port_dev) { } + int usb_disable_lpm(struct usb_device *udev) { return 0; -- GitLab From eab169397ad687e69b3bde3547a1fd37e7f81194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 6 Dec 2016 00:39:33 +0100 Subject: [PATCH 0019/1442] usb: core: usbport: Use proper LED API to fix potential crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 89778ba335e302a450932ce5b703c1ee6216e949 upstream. Calling brightness_set manually isn't safe as some LED drivers don't implement this callback. The best idea is to just use a proper helper which will fallback to the brightness_set_blocking callback if needed. This fixes: [ 1461.761528] Unable to handle kernel NULL pointer dereference at virtual address 00000000 (...) [ 1462.117049] Backtrace: [ 1462.119521] [] (usbport_trig_port_store [ledtrig_usbport]) from [] (dev_attr_store+0x20/0x2c) [ 1462.129826] r7:dcabc7c0 r6:dee0ff80 r5:00000002 r4:bf228164 [ 1462.135511] [] (dev_attr_store) from [] (sysfs_kf_write+0x48/0x4c) [ 1462.143459] r5:00000002 r4:c023f738 [ 1462.147049] [] (sysfs_kf_write) from [] (kernfs_fop_write+0xf8/0x1f8) [ 1462.155258] r5:00000002 r4:df4a1000 [ 1462.158850] [] (kernfs_fop_write) from [] (__vfs_write+0x34/0x120) [ 1462.166800] r10:00000000 r9:dee0e000 r8:c000fc24 r7:00000002 r6:dee0ff80 r5:c01689c0 [ 1462.174660] r4:df727a80 [ 1462.177204] [] (__vfs_write) from [] (vfs_write+0xac/0x170) [ 1462.184543] r9:dee0e000 r8:c000fc24 r7:dee0ff80 r6:b6f092d0 r5:df727a80 r4:00000002 [ 1462.192319] [] (vfs_write) from [] (SyS_write+0x4c/0xa8) [ 1462.199396] r9:dee0e000 r8:c000fc24 r7:00000002 r6:b6f092d0 r5:df727a80 r4:df727a80 [ 1462.207174] [] (SyS_write) from [] (ret_fast_syscall+0x0/0x3c) [ 1462.214774] r7:00000004 r6:ffffffff r5:00000000 r4:00000000 [ 1462.220456] Code: bad PC value [ 1462.223560] ---[ end trace 676638a3a12c7a56 ]--- Reported-by: Ralph Sennhauser Signed-off-by: Rafał Miłecki Fixes: 0f247626cbb ("usb: core: Introduce a USB port LED trigger") Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/ledtrig-usbport.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/ledtrig-usbport.c b/drivers/usb/core/ledtrig-usbport.c index 3ed5162677ad..1713248ab15a 100644 --- a/drivers/usb/core/ledtrig-usbport.c +++ b/drivers/usb/core/ledtrig-usbport.c @@ -74,8 +74,7 @@ static void usbport_trig_update_count(struct usbport_trig_data *usbport_data) usbport_data->count = 0; usb_for_each_dev(usbport_data, usbport_trig_usb_dev_check); - led_cdev->brightness_set(led_cdev, - usbport_data->count ? LED_FULL : LED_OFF); + led_set_brightness(led_cdev, usbport_data->count ? LED_FULL : LED_OFF); } /*************************************** @@ -228,12 +227,12 @@ static int usbport_trig_notify(struct notifier_block *nb, unsigned long action, case USB_DEVICE_ADD: usbport_trig_add_usb_dev_ports(usb_dev, usbport_data); if (observed && usbport_data->count++ == 0) - led_cdev->brightness_set(led_cdev, LED_FULL); + led_set_brightness(led_cdev, LED_FULL); return NOTIFY_OK; case USB_DEVICE_REMOVE: usbport_trig_remove_usb_dev_ports(usbport_data, usb_dev); if (observed && --usbport_data->count == 0) - led_cdev->brightness_set(led_cdev, LED_OFF); + led_set_brightness(led_cdev, LED_OFF); return NOTIFY_OK; } -- GitLab From 5180169dae85a3bd5b081529ba149374a0b3ffed Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 8 Nov 2016 10:10:44 +0800 Subject: [PATCH 0020/1442] usb: gadget: f_uac2: fix error handling at afunc_bind commit f1d3861d63a5d79b8968a02eea1dcb01bb684e62 upstream. The current error handling flow uses incorrect goto label, fix it Fixes: d12a8727171c ("usb: gadget: function: Remove redundant usb_free_all_descriptors") Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index cd214ec8a601..969cfe741380 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -1067,13 +1067,13 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) agdev->out_ep = usb_ep_autoconfig(gadget, &fs_epout_desc); if (!agdev->out_ep) { dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); - goto err; + return ret; } agdev->in_ep = usb_ep_autoconfig(gadget, &fs_epin_desc); if (!agdev->in_ep) { dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); - goto err; + return ret; } uac2->p_prm.uac2 = uac2; @@ -1091,7 +1091,7 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) ret = usb_assign_descriptors(fn, fs_audio_desc, hs_audio_desc, NULL, NULL); if (ret) - goto err; + return ret; prm = &agdev->uac2.c_prm; prm->max_psize = hs_epout_desc.wMaxPacketSize; @@ -1106,19 +1106,19 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) prm->rbuf = kzalloc(prm->max_psize * USB_XFERS, GFP_KERNEL); if (!prm->rbuf) { prm->max_psize = 0; - goto err_free_descs; + goto err; } ret = alsa_uac2_init(agdev); if (ret) - goto err_free_descs; + goto err; return 0; -err_free_descs: - usb_free_all_descriptors(fn); err: kfree(agdev->uac2.p_prm.rbuf); kfree(agdev->uac2.c_prm.rbuf); +err_free_descs: + usb_free_all_descriptors(fn); return -EINVAL; } -- GitLab From e0aa5ec40d6effc891eb63740bb03f5f50cb539e Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 28 Sep 2016 10:38:11 +0300 Subject: [PATCH 0021/1442] usb: gadget: composite: correctly initialize ep->maxpacket commit e8f29bb719b47a234f33b0af62974d7a9521a52c upstream. usb_endpoint_maxp() returns wMaxPacketSize in its raw form. Without taking into consideration that it also contains other bits reserved for isochronous endpoints. This patch fixes one occasion where this is a problem by making sure that we initialize ep->maxpacket only with lower 10 bits of the value returned by usb_endpoint_maxp(). Note that seperate patches will be necessary to audit all call sites of usb_endpoint_maxp() and make sure that usb_endpoint_maxp() only returns lower 10 bits of wMaxPacketSize. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 32176f779861..f6a7583ab6d1 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -197,7 +197,7 @@ int config_ep_by_speed(struct usb_gadget *g, ep_found: /* commit results */ - _ep->maxpacket = usb_endpoint_maxp(chosen_desc); + _ep->maxpacket = usb_endpoint_maxp(chosen_desc) & 0x7ff; _ep->desc = chosen_desc; _ep->comp_desc = NULL; _ep->maxburst = 0; -- GitLab From 420f170ce1ba999872f4be893807041b0d8db930 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 21 Oct 2016 16:49:07 -0400 Subject: [PATCH 0022/1442] USB: UHCI: report non-PME wakeup signalling for Intel hardware commit ccdb6be9ec6580ef69f68949ebe26e0fb58a6fb0 upstream. The UHCI controllers in Intel chipsets rely on a platform-specific non-PME mechanism for wakeup signalling. They can generate wakeup signals even though they don't support PME. We need to let the USB core know this so that it will enable runtime suspend for UHCI controllers. Signed-off-by: Alan Stern Signed-off-by: Bjorn Helgaas Acked-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/uhci-pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/host/uhci-pci.c b/drivers/usb/host/uhci-pci.c index 940304c33224..02260cfdedb1 100644 --- a/drivers/usb/host/uhci-pci.c +++ b/drivers/usb/host/uhci-pci.c @@ -129,6 +129,10 @@ static int uhci_pci_init(struct usb_hcd *hcd) if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_HP) uhci->wait_for_hp = 1; + /* Intel controllers use non-PME wakeup signalling */ + if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_INTEL) + device_set_run_wake(uhci_dev(uhci), 1); + /* Set up pointers to PCI-specific functions */ uhci->reset_hc = uhci_pci_reset_hc; uhci->check_and_reset_hc = uhci_pci_check_and_reset_hc; -- GitLab From 77bd73ce21fafe2ca80fe09b6e9dbebf93fd4cd6 Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Thu, 1 Dec 2016 19:14:37 +0100 Subject: [PATCH 0023/1442] usbip: vudc: fix: Clear already_seen flag also for ep0 commit 3e448e13a662fb20145916636127995cbf37eb83 upstream. ep_list inside gadget structure doesn't contain ep0. It is stored separately in ep0 field. This causes an urb hang if gadget driver decides to delay setup handling. On host side this is visible as timeout error when setting configuration. This bug can be reproduced using for example any gadget with mass storage function. Fixes: abdb29574322 ("usbip: vudc: Add vudc_transfer") Signed-off-by: Krzysztof Opasiak Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_transfer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/usbip/vudc_transfer.c b/drivers/usb/usbip/vudc_transfer.c index aba6bd478045..bc0296d937d0 100644 --- a/drivers/usb/usbip/vudc_transfer.c +++ b/drivers/usb/usbip/vudc_transfer.c @@ -339,6 +339,8 @@ static void v_timer(unsigned long _vudc) total = timer->frame_limit; } + /* We have to clear ep0 flags separately as it's not on the list */ + udc->ep[0].already_seen = 0; list_for_each_entry(_ep, &udc->gadget.ep_list, ep_list) { ep = to_vep(_ep); ep->already_seen = 0; -- GitLab From 205d3de9637f253dfaeca8a7740af1e9d554c3c7 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 9 Dec 2016 15:15:57 +1100 Subject: [PATCH 0024/1442] ALSA: usb-audio: Add QuickCam Communicate Deluxe/S7500 to volume_control_quirks commit 82ffb6fc637150b279f49e174166d2aa3853eaf4 upstream. The Logitech QuickCam Communicate Deluxe/S7500 microphone fails with the following warning. [ 6.778995] usb 2-1.2.2.2: Warning! Unlikely big volume range (=3072), cval->res is probably wrong. [ 6.778996] usb 2-1.2.2.2: [5] FU [Mic Capture Volume] ch = 1, val = 4608/7680/1 Adding it to the list of devices in volume_control_quirks makes it work properly, fixing related typo. Signed-off-by: Con Kolivas Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 2f8c388ef84f..4703caea56b2 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -932,9 +932,10 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, case USB_ID(0x046d, 0x0826): /* HD Webcam c525 */ case USB_ID(0x046d, 0x08ca): /* Logitech Quickcam Fusion */ case USB_ID(0x046d, 0x0991): + case USB_ID(0x046d, 0x09a2): /* QuickCam Communicate Deluxe/S7500 */ /* Most audio usb devices lie about volume resolution. * Most Logitech webcams have res = 384. - * Proboly there is some logitech magic behind this number --fishor + * Probably there is some logitech magic behind this number --fishor */ if (!strcmp(kctl->id.name, "Mic Capture Volume")) { usb_audio_info(chip, -- GitLab From fa2e770f88bcd50a34182792d06a1ff488cbac5c Mon Sep 17 00:00:00 2001 From: Jussi Laako Date: Mon, 28 Nov 2016 11:27:45 +0200 Subject: [PATCH 0025/1442] ALSA: hiface: Fix M2Tech hiFace driver sampling rate change commit 995c6a7fd9b9212abdf01160f6ce3193176be503 upstream. Sampling rate changes after first set one are not reflected to the hardware, while driver and ALSA think the rate has been changed. Fix the problem by properly stopping the interface at the beginning of prepare call, allowing new rate to be set to the hardware. This keeps the hardware in sync with the driver. Signed-off-by: Jussi Laako Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/hiface/pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/hiface/pcm.c b/sound/usb/hiface/pcm.c index 2c44139b4041..33db205dd12b 100644 --- a/sound/usb/hiface/pcm.c +++ b/sound/usb/hiface/pcm.c @@ -445,6 +445,8 @@ static int hiface_pcm_prepare(struct snd_pcm_substream *alsa_sub) mutex_lock(&rt->stream_mutex); + hiface_pcm_stream_stop(rt); + sub->dma_off = 0; sub->period_off = 0; -- GitLab From 0f1047be4a9f8411c8ed9ba1a4bb63bfa6659e1c Mon Sep 17 00:00:00 2001 From: Sven Hahne Date: Fri, 25 Nov 2016 14:16:43 +0100 Subject: [PATCH 0026/1442] ALSA: hda/ca0132 - Add quirk for Alienware 15 R2 2016 commit b5337cfe067e96b8a98699da90c7dcd2bec21133 upstream. I'm using an Alienware 15 R2 and had to use the alienware quirks to get my headphone output working. I fixed it by adding, SND_PCI_QUIRK(0x1028, 0x0708, "Alienware 15 R2 2016", QUIRK_ALIENWARE) to the patch. Signed-off-by: Sven Hahne Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_ca0132.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index ad06866d7c69..11b9b2f17a2e 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -780,6 +780,7 @@ static const struct hda_pintbl alienware_pincfgs[] = { static const struct snd_pci_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1028, 0x0685, "Alienware 15 2015", QUIRK_ALIENWARE), SND_PCI_QUIRK(0x1028, 0x0688, "Alienware 17 2015", QUIRK_ALIENWARE), + SND_PCI_QUIRK(0x1028, 0x0708, "Alienware 15 R2 2016", QUIRK_ALIENWARE), {} }; -- GitLab From 37b7c5db5a3065ce5d72ce7067d420273819f9a9 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 23 Nov 2016 16:05:37 +0800 Subject: [PATCH 0027/1442] ALSA: hda - ignore the assoc and seq when comparing pin configurations commit 64047d7f4912de1769d1bf0d34c6322494b13779 upstream. More and more pin configurations have been adding to the pin quirk table, lots of them are only different from assoc and seq, but they all apply to the same QUIRK_FIXUP, if we don't compare assoc and seq when matching pin configurations, it will greatly reduce the pin quirk table size. We have tested this change on a couple of Dell laptops, it worked well. Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_auto_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 7f57a145a47e..4ad29f8d7a4a 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -901,7 +901,7 @@ static bool pin_config_match(struct hda_codec *codec, for (; t_pins->nid; t_pins++) { if (t_pins->nid == nid) { found = 1; - if (t_pins->val == cfg) + if ((t_pins->val & 0xfffffff0) == (cfg & 0xfffffff0)) break; else if ((cfg & 0xf0000000) == 0x40000000 && (t_pins->val & 0xf0000000) == 0x40000000) break; -- GitLab From 0119d5d44034399478ae80349e810acb15aa5e1d Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 23 Nov 2016 16:05:38 +0800 Subject: [PATCH 0028/1442] ALSA: hda - fix headset-mic problem on a Dell laptop commit 989dbe4a30728c047316ab87e5fa8b609951ce7c upstream. This group of new pins is not in the pin quirk table yet, adding them to the pin quirk table to fix the headset-mic problem. Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ea81c08ddc7a..3f75d1b83bf2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5917,6 +5917,9 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60180}, {0x14, 0x90170120}, {0x21, 0x02211030}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x1b, 0x01011020}, + {0x21, 0x02211010}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, -- GitLab From e029ef3a9c82ae05792173235f88d5e46db0f79c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Dec 2016 11:55:17 +0100 Subject: [PATCH 0029/1442] ALSA: hda - Gate the mic jack on HP Z1 Gen3 AiO commit f73cd43ac3b41c0f09a126387f302bbc0d9c726d upstream. HP Z1 Gen3 AiO with Conexant codec doesn't give an unsolicited event to the headset mic pin upon the jack plugging, it reports only to the headphone pin. It results in the missing mic switching. Let's fix up by simply gating the jack event. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index ed62748a6d55..c15c51bea26d 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -262,6 +262,7 @@ enum { CXT_FIXUP_CAP_MIX_AMP_5047, CXT_FIXUP_MUTE_LED_EAPD, CXT_FIXUP_HP_SPECTRE, + CXT_FIXUP_HP_GATE_MIC, }; /* for hda_fixup_thinkpad_acpi() */ @@ -633,6 +634,17 @@ static void cxt_fixup_cap_mix_amp_5047(struct hda_codec *codec, (1 << AC_AMPCAP_MUTE_SHIFT)); } +static void cxt_fixup_hp_gate_mic_jack(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* the mic pin (0x19) doesn't give an unsolicited event; + * probe the mic pin together with the headphone pin (0x16) + */ + if (action == HDA_FIXUP_ACT_PROBE) + snd_hda_jack_set_gating_jack(codec, 0x19, 0x16); +} + /* ThinkPad X200 & co with cxt5051 */ static const struct hda_pintbl cxt_pincfg_lenovo_x200[] = { { 0x16, 0x042140ff }, /* HP (seq# overridden) */ @@ -774,6 +786,10 @@ static const struct hda_fixup cxt_fixups[] = { { } } }, + [CXT_FIXUP_HP_GATE_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_hp_gate_mic_jack, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -824,6 +840,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC), SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), + SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), -- GitLab From 0ce4f00087b476db4baf97344fffee4d84a6e9d3 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 6 Dec 2016 16:56:27 +0800 Subject: [PATCH 0030/1442] ALSA: hda: when comparing pin configurations, ignore assoc in addition to seq commit 5e0ad0d8747f3e4803a9c3d96d64dd7332506d3c upstream. Commit [64047d7f4912 ALSA: hda - ignore the assoc and seq when comparing pin configurations] intented to ignore both seq and assoc at pin comparing, but it only ignored seq. So that commit may still fail to match pins on some machines. Change the bitmask to also ignore assoc. v2: Use macro to do bit masking. Thanks to Hui Wang for the analysis. Fixes: 64047d7f4912 ("ALSA: hda - ignore the assoc and seq when comparing...") Signed-off-by: Kai-Heng Feng Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_auto_parser.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 4ad29f8d7a4a..a03cf68d0bcd 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -884,6 +884,8 @@ void snd_hda_apply_fixup(struct hda_codec *codec, int action) } EXPORT_SYMBOL_GPL(snd_hda_apply_fixup); +#define IGNORE_SEQ_ASSOC (~(AC_DEFCFG_SEQUENCE | AC_DEFCFG_DEF_ASSOC)) + static bool pin_config_match(struct hda_codec *codec, const struct hda_pintbl *pins) { @@ -901,7 +903,7 @@ static bool pin_config_match(struct hda_codec *codec, for (; t_pins->nid; t_pins++) { if (t_pins->nid == nid) { found = 1; - if ((t_pins->val & 0xfffffff0) == (cfg & 0xfffffff0)) + if ((t_pins->val & IGNORE_SEQ_ASSOC) == (cfg & IGNORE_SEQ_ASSOC)) break; else if ((cfg & 0xf0000000) == 0x40000000 && (t_pins->val & 0xf0000000) == 0x40000000) break; -- GitLab From 0de98eef9c115ce0a23995d06bba32691297fbf8 Mon Sep 17 00:00:00 2001 From: Richard Watts Date: Fri, 2 Dec 2016 23:14:38 +0200 Subject: [PATCH 0031/1442] clk: ti: omap36xx: Work around sprz319 advisory 2.1 commit 035cd485a47dda64f25ccf8a90b11a07d0b7aa7a upstream. The OMAP36xx DPLL5, driving EHCI USB, can be subject to a long-term frequency drift. The frequency drift magnitude depends on the VCO update rate, which is inversely proportional to the PLL divider. The kernel DPLL configuration code results in a high value for the divider, leading to a long term drift high enough to cause USB transmission errors. In the worst case the USB PHY's ULPI interface can stop responding, breaking USB operation completely. This manifests itself on the Beagleboard xM by the LAN9514 reporting 'Cannot enable port 2. Maybe the cable is bad?' in the kernel log. Errata sprz319 advisory 2.1 documents PLL values that minimize the drift. Use them automatically when DPLL5 is used for USB operation, which we detect based on the requested clock rate. The clock framework will still compute the PLL parameters and resulting rate as usual, but the PLL M and N values will then be overridden. This can result in the effective clock rate being slightly different than the rate cached by the clock framework, but won't cause any adverse effect to USB operation. Signed-off-by: Richard Watts [Upported from v3.2 to v4.9] Signed-off-by: Laurent Pinchart Tested-by: Ladislav Michl Signed-off-by: Stephen Boyd Cc: Adam Ford Signed-off-by: Greg Kroah-Hartman --- drivers/clk/ti/clk-3xxx.c | 20 ++++++------ drivers/clk/ti/clock.h | 9 ++++++ drivers/clk/ti/dpll.c | 19 ++++++++++- drivers/clk/ti/dpll3xxx.c | 67 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 11 deletions(-) diff --git a/drivers/clk/ti/clk-3xxx.c b/drivers/clk/ti/clk-3xxx.c index 8831e1a05367..11d8aa3ec186 100644 --- a/drivers/clk/ti/clk-3xxx.c +++ b/drivers/clk/ti/clk-3xxx.c @@ -22,13 +22,6 @@ #include "clock.h" -/* - * DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks - * that are sourced by DPLL5, and both of these require this clock - * to be at 120 MHz for proper operation. - */ -#define DPLL5_FREQ_FOR_USBHOST 120000000 - #define OMAP3430ES2_ST_DSS_IDLE_SHIFT 1 #define OMAP3430ES2_ST_HSOTGUSB_IDLE_SHIFT 5 #define OMAP3430ES2_ST_SSI_IDLE_SHIFT 8 @@ -546,14 +539,21 @@ void __init omap3_clk_lock_dpll5(void) struct clk *dpll5_clk; struct clk *dpll5_m2_clk; + /* + * Errata sprz319f advisory 2.1 documents a USB host clock drift issue + * that can be worked around using specially crafted dpll5 settings + * with a dpll5_m2 divider set to 8. Set the dpll5 rate to 8x the USB + * host clock rate, its .set_rate handler() will detect that frequency + * and use the errata settings. + */ dpll5_clk = clk_get(NULL, "dpll5_ck"); - clk_set_rate(dpll5_clk, DPLL5_FREQ_FOR_USBHOST); + clk_set_rate(dpll5_clk, OMAP3_DPLL5_FREQ_FOR_USBHOST * 8); clk_prepare_enable(dpll5_clk); - /* Program dpll5_m2_clk divider for no division */ + /* Program dpll5_m2_clk divider */ dpll5_m2_clk = clk_get(NULL, "dpll5_m2_ck"); clk_prepare_enable(dpll5_m2_clk); - clk_set_rate(dpll5_m2_clk, DPLL5_FREQ_FOR_USBHOST); + clk_set_rate(dpll5_m2_clk, OMAP3_DPLL5_FREQ_FOR_USBHOST); clk_disable_unprepare(dpll5_m2_clk); clk_disable_unprepare(dpll5_clk); diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 90f3f472ae1c..13c37f48d9d6 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -257,11 +257,20 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, unsigned long parent_rate); +/* + * OMAP3_DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks + * that are sourced by DPLL5, and both of these require this clock + * to be at 120 MHz for proper operation. + */ +#define OMAP3_DPLL5_FREQ_FOR_USBHOST 120000000 + unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate); int omap3_dpll4_set_rate(struct clk_hw *clk, unsigned long rate, unsigned long parent_rate); int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate, u8 index); +int omap3_dpll5_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate); void omap3_clk_lock_dpll5(void); unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw, diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c index 9fc8754a6e61..4b9a419d8e14 100644 --- a/drivers/clk/ti/dpll.c +++ b/drivers/clk/ti/dpll.c @@ -114,6 +114,18 @@ static const struct clk_ops omap3_dpll_ck_ops = { .round_rate = &omap2_dpll_round_rate, }; +static const struct clk_ops omap3_dpll5_ck_ops = { + .enable = &omap3_noncore_dpll_enable, + .disable = &omap3_noncore_dpll_disable, + .get_parent = &omap2_init_dpll_parent, + .recalc_rate = &omap3_dpll_recalc, + .set_rate = &omap3_dpll5_set_rate, + .set_parent = &omap3_noncore_dpll_set_parent, + .set_rate_and_parent = &omap3_noncore_dpll_set_rate_and_parent, + .determine_rate = &omap3_noncore_dpll_determine_rate, + .round_rate = &omap2_dpll_round_rate, +}; + static const struct clk_ops omap3_dpll_per_ck_ops = { .enable = &omap3_noncore_dpll_enable, .disable = &omap3_noncore_dpll_disable, @@ -474,7 +486,12 @@ static void __init of_ti_omap3_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd); + if ((of_machine_is_compatible("ti,omap3630") || + of_machine_is_compatible("ti,omap36xx")) && + !strcmp(node->name, "dpll5_ck")) + of_ti_dpll_setup(node, &omap3_dpll5_ck_ops, &dd); + else + of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap3_dpll_clock, "ti,omap3-dpll-clock", of_ti_omap3_dpll_setup); diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c index 88f2ce81ba55..4cdd28a25584 100644 --- a/drivers/clk/ti/dpll3xxx.c +++ b/drivers/clk/ti/dpll3xxx.c @@ -838,3 +838,70 @@ int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, return omap3_noncore_dpll_set_rate_and_parent(hw, rate, parent_rate, index); } + +/* Apply DM3730 errata sprz319 advisory 2.1. */ +static bool omap3_dpll5_apply_errata(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct omap3_dpll5_settings { + unsigned int rate, m, n; + }; + + static const struct omap3_dpll5_settings precomputed[] = { + /* + * From DM3730 errata advisory 2.1, table 35 and 36. + * The N value is increased by 1 compared to the tables as the + * errata lists register values while last_rounded_field is the + * real divider value. + */ + { 12000000, 80, 0 + 1 }, + { 13000000, 443, 5 + 1 }, + { 19200000, 50, 0 + 1 }, + { 26000000, 443, 11 + 1 }, + { 38400000, 25, 0 + 1 } + }; + + const struct omap3_dpll5_settings *d; + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *dd; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(precomputed); ++i) { + if (parent_rate == precomputed[i].rate) + break; + } + + if (i == ARRAY_SIZE(precomputed)) + return false; + + d = &precomputed[i]; + + /* Update the M, N and rounded rate values and program the DPLL. */ + dd = clk->dpll_data; + dd->last_rounded_m = d->m; + dd->last_rounded_n = d->n; + dd->last_rounded_rate = div_u64((u64)parent_rate * d->m, d->n); + omap3_noncore_dpll_program(clk, 0); + + return true; +} + +/** + * omap3_dpll5_set_rate - set rate for omap3 dpll5 + * @hw: clock to change + * @rate: target rate for clock + * @parent_rate: rate of the parent clock + * + * Set rate for the DPLL5 clock. Apply the sprz319 advisory 2.1 on OMAP36xx if + * the DPLL is used for USB host (detected through the requested rate). + */ +int omap3_dpll5_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + if (rate == OMAP3_DPLL5_FREQ_FOR_USBHOST * 8) { + if (omap3_dpll5_apply_errata(hw, parent_rate)) + return 0; + } + + return omap3_noncore_dpll_set_rate(hw, rate, parent_rate); +} -- GitLab From 21245b8635e8636e9e01df06f33cf08f369322b7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 16 Nov 2016 22:06:51 -0600 Subject: [PATCH 0032/1442] exec: Ensure mm->user_ns contains the execed files commit f84df2a6f268de584a201e8911384a2d244876e3 upstream. When the user namespace support was merged the need to prevent ptrace from revealing the contents of an unreadable executable was overlooked. Correct this oversight by ensuring that the executed file or files are in mm->user_ns, by adjusting mm->user_ns. Use the new function privileged_wrt_inode_uidgid to see if the executable is a member of the user namespace, and as such if having CAP_SYS_PTRACE in the user namespace should allow tracing the executable. If not update mm->user_ns to the parent user namespace until an appropriate parent is found. Reported-by: Jann Horn Fixes: 9e4a36ece652 ("userns: Fail exec for suid and sgid binaries with ids outside our user namespace.") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 19 +++++++++++++++++-- include/linux/capability.h | 1 + kernel/capability.c | 16 ++++++++++++++-- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 4e497b9ee71e..669c09bfdcad 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1275,8 +1275,22 @@ EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { - if (inode_permission(file_inode(file), MAY_READ) < 0) + struct inode *inode = file_inode(file); + if (inode_permission(inode, MAY_READ) < 0) { + struct user_namespace *old, *user_ns; bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; + + /* Ensure mm->user_ns contains the executable */ + user_ns = old = bprm->mm->user_ns; + while ((user_ns != &init_user_ns) && + !privileged_wrt_inode_uidgid(user_ns, inode)) + user_ns = user_ns->parent; + + if (old != user_ns) { + bprm->mm->user_ns = get_user_ns(user_ns); + put_user_ns(old); + } + } } EXPORT_SYMBOL(would_dump); @@ -1306,7 +1320,6 @@ void setup_new_exec(struct linux_binprm * bprm) !gid_eq(bprm->cred->gid, current_egid())) { current->pdeath_signal = 0; } else { - would_dump(bprm, bprm->file); if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) set_dumpable(current->mm, suid_dumpable); } @@ -1741,6 +1754,8 @@ static int do_execveat_common(int fd, struct filename *filename, if (retval < 0) goto out; + would_dump(bprm, bprm->file); + retval = exec_binprm(bprm); if (retval < 0) goto out; diff --git a/include/linux/capability.h b/include/linux/capability.h index dbc21c719ce6..6ce3eefe2769 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -240,6 +240,7 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap) return true; } #endif /* CONFIG_MULTIUSER */ +extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode); extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); diff --git a/kernel/capability.c b/kernel/capability.c index 00411c82dac5..c020c0047983 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -456,6 +456,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns, } EXPORT_SYMBOL(file_ns_capable); +/** + * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode? + * @ns: The user namespace in question + * @inode: The inode in question + * + * Return true if the inode uid and gid are within the namespace. + */ +bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode) +{ + return kuid_has_mapping(ns, inode->i_uid) && + kgid_has_mapping(ns, inode->i_gid); +} + /** * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped * @inode: The inode in question @@ -469,7 +482,6 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap) { struct user_namespace *ns = current_user_ns(); - return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) && - kgid_has_mapping(ns, inode->i_gid); + return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode); } EXPORT_SYMBOL(capable_wrt_inode_uidgid); -- GitLab From c1df5a63716b0f73e95b1b26d878691e6175ea34 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 21 Dec 2016 16:26:24 +1100 Subject: [PATCH 0033/1442] fs: exec: apply CLOEXEC before changing dumpable task flags commit 613cc2b6f272c1a8ad33aefa21cad77af23139f7 upstream. If you have a process that has set itself to be non-dumpable, and it then undergoes exec(2), any CLOEXEC file descriptors it has open are "exposed" during a race window between the dumpable flags of the process being reset for exec(2) and CLOEXEC being applied to the file descriptors. This can be exploited by a process by attempting to access /proc//fd/... during this window, without requiring CAP_SYS_PTRACE. The race in question is after set_dumpable has been (for get_link, though the trace is basically the same for readlink): [vfs] -> proc_pid_link_inode_operations.get_link -> proc_pid_get_link -> proc_fd_access_allowed -> ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); Which will return 0, during the race window and CLOEXEC file descriptors will still be open during this window because do_close_on_exec has not been called yet. As a result, the ordering of these calls should be reversed to avoid this race window. This is of particular concern to container runtimes, where joining a PID namespace with file descriptors referring to the host filesystem can result in security issues (since PRCTL_SET_DUMPABLE doesn't protect against access of CLOEXEC file descriptors -- file descriptors which may reference filesystem objects the container shouldn't have access to). Cc: dev@opencontainers.org Reported-by: Michael Crosby Signed-off-by: Aleksa Sarai Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 669c09bfdcad..a9dec2f540dc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -19,7 +19,7 @@ * current->executable is only used by the procfs. This allows a dispatch * table to check for several different types of binary formats. We keep * trying until we recognize the file or we run out of supported binary - * formats. + * formats. */ #include @@ -1266,6 +1266,13 @@ int flush_old_exec(struct linux_binprm * bprm) flush_thread(); current->personality &= ~bprm->per_clear; + /* + * We have to apply CLOEXEC before we change whether the process is + * dumpable (in setup_new_exec) to avoid a race with a process in userspace + * trying to access the should-be-closed file descriptors of a process + * undergoing exec(2). + */ + do_close_on_exec(current->files); return 0; out: @@ -1328,7 +1335,6 @@ void setup_new_exec(struct linux_binprm * bprm) group */ current->self_exec_id++; flush_signal_handlers(current, 0); - do_close_on_exec(current->files); } EXPORT_SYMBOL(setup_new_exec); -- GitLab From b6cce9b8e8139e8b9130dd2efe950bac0d4fea50 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 21 Dec 2016 10:59:34 -0800 Subject: [PATCH 0034/1442] splice: reinstate SIGPIPE/EPIPE handling commit 52bce91165e5f2db422b2b972e83d389e5e4725c upstream. Commit 8924feff66f3 ("splice: lift pipe_lock out of splice_to_pipe()") caused a regression when there were no more readers left on a pipe that was being spliced into: rather than the expected SIGPIPE and -EPIPE return value, the writer would end up waiting forever for space to free up (which obviously was not going to happen with no readers around). Fixes: 8924feff66f3 ("splice: lift pipe_lock out of splice_to_pipe()") Reported-and-tested-by: Andreas Schwab Debugged-by: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/splice.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 5a7750bd2eea..63b8f54485dc 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1086,7 +1086,13 @@ EXPORT_SYMBOL(do_splice_direct); static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) { - while (pipe->nrbufs == pipe->buffers) { + for (;;) { + if (unlikely(!pipe->readers)) { + send_sig(SIGPIPE, current, 0); + return -EPIPE; + } + if (pipe->nrbufs != pipe->buffers) + return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; if (signal_pending(current)) @@ -1095,7 +1101,6 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) pipe_wait(pipe); pipe->waiting_writers--; } - return 0; } static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, -- GitLab From cfa2d65b2622d14b2f1368fbc9a6b4ab85141644 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 12 Dec 2016 08:21:51 -0700 Subject: [PATCH 0035/1442] block_dev: don't test bdev->bd_contains when it is not stable commit bcc7f5b4bee8e327689a4d994022765855c807ff upstream. bdev->bd_contains is not stable before calling __blkdev_get(). When __blkdev_get() is called on a parition with ->bd_openers == 0 it sets bdev->bd_contains = bdev; which is not correct for a partition. After a call to __blkdev_get() succeeds, ->bd_openers will be > 0 and then ->bd_contains is stable. When FMODE_EXCL is used, blkdev_get() calls bd_start_claiming() -> bd_prepare_to_claim() -> bd_may_claim() This call happens before __blkdev_get() is called, so ->bd_contains is not stable. So bd_may_claim() cannot safely use ->bd_contains. It currently tries to use it, and this can lead to a BUG_ON(). This happens when a whole device is already open with a bd_holder (in use by dm in my particular example) and two threads race to open a partition of that device for the first time, one opening with O_EXCL and one without. The thread that doesn't use O_EXCL gets through blkdev_get() to __blkdev_get(), gains the ->bd_mutex, and sets bdev->bd_contains = bdev; Immediately thereafter the other thread, using FMODE_EXCL, calls bd_start_claiming() from blkdev_get(). This should fail because the whole device has a holder, but because bdev->bd_contains == bdev bd_may_claim() incorrectly reports success. This thread continues and blocks on bd_mutex. The first thread then sets bdev->bd_contains correctly and drops the mutex. The thread using FMODE_EXCL then continues and when it calls bd_may_claim() again in: BUG_ON(!bd_may_claim(bdev, whole, holder)); The BUG_ON fires. Fix this by removing the dependency on ->bd_contains in bd_may_claim(). As bd_may_claim() has direct access to the whole device, it can simply test if the target bdev is the whole device. Fixes: 6b4517a7913a ("block: implement bd_claiming and claiming block") Signed-off-by: NeilBrown Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 05b553368bb4..9166b9f63d33 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -832,7 +832,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, return true; /* already a holder */ else if (bdev->bd_holder != NULL) return false; /* held by someone else */ - else if (bdev->bd_contains == bdev) + else if (whole == bdev) return true; /* is a whole device which isn't held */ else if (whole->bd_holder == bd_may_claim) -- GitLab From 694a95fa6dae4991f16cda333d897ea063021fed Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Oct 2016 21:23:16 -0500 Subject: [PATCH 0036/1442] mm: Add a user_ns owner to mm_struct and fix ptrace permission checks commit bfedb589252c01fa505ac9f6f2a3d5d68d707ef4 upstream. During exec dumpable is cleared if the file that is being executed is not readable by the user executing the file. A bug in ptrace_may_access allows reading the file if the executable happens to enter into a subordinate user namespace (aka clone(CLONE_NEWUSER), unshare(CLONE_NEWUSER), or setns(fd, CLONE_NEWUSER). This problem is fixed with only necessary userspace breakage by adding a user namespace owner to mm_struct, captured at the time of exec, so it is clear in which user namespace CAP_SYS_PTRACE must be present in to be able to safely give read permission to the executable. The function ptrace_may_access is modified to verify that the ptracer has CAP_SYS_ADMIN in task->mm->user_ns instead of task->cred->user_ns. This ensures that if the task changes it's cred into a subordinate user namespace it does not become ptraceable. The function ptrace_attach is modified to only set PT_PTRACE_CAP when CAP_SYS_PTRACE is held over task->mm->user_ns. The intent of PT_PTRACE_CAP is to be a flag to note that whatever permission changes the task might go through the tracer has sufficient permissions for it not to be an issue. task->cred->user_ns is always the same as or descendent of mm->user_ns. Which guarantees that having CAP_SYS_PTRACE over mm->user_ns is the worst case for the tasks credentials. To prevent regressions mm->dumpable and mm->user_ns are not considered when a task has no mm. As simply failing ptrace_may_attach causes regressions in privileged applications attempting to read things such as /proc//stat Acked-by: Kees Cook Tested-by: Cyrill Gorcunov Fixes: 8409cca70561 ("userns: allow ptrace from non-init user namespaces") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- include/linux/mm_types.h | 1 + kernel/fork.c | 9 ++++++--- kernel/ptrace.c | 26 +++++++++++--------------- mm/init-mm.c | 2 ++ 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4a8acedf4b7d..08d947fc4c59 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -473,6 +473,7 @@ struct mm_struct { */ struct task_struct __rcu *owner; #endif + struct user_namespace *user_ns; /* store ref to file /proc//exe symlink points to */ struct file __rcu *exe_file; diff --git a/kernel/fork.c b/kernel/fork.c index 997ac1d584f7..ba8a01564985 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -745,7 +745,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) +static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, + struct user_namespace *user_ns) { mm->mmap = NULL; mm->mm_rb = RB_ROOT; @@ -785,6 +786,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) if (init_new_context(p, mm)) goto fail_nocontext; + mm->user_ns = get_user_ns(user_ns); return mm; fail_nocontext: @@ -830,7 +832,7 @@ struct mm_struct *mm_alloc(void) return NULL; memset(mm, 0, sizeof(*mm)); - return mm_init(mm, current); + return mm_init(mm, current, current_user_ns()); } /* @@ -845,6 +847,7 @@ void __mmdrop(struct mm_struct *mm) destroy_context(mm); mmu_notifier_mm_destroy(mm); check_mm(mm); + put_user_ns(mm->user_ns); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -1126,7 +1129,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); - if (!mm_init(mm, tsk)) + if (!mm_init(mm, tsk, mm->user_ns)) goto fail_nomem; err = dup_mmap(mm, oldmm); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e6474f7272ec..282821557183 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -220,7 +220,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; - int dumpable = 0; + struct mm_struct *mm; kuid_t caller_uid; kgid_t caller_gid; @@ -271,16 +271,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) return -EPERM; ok: rcu_read_unlock(); - smp_rmb(); - if (task->mm) - dumpable = get_dumpable(task->mm); - rcu_read_lock(); - if (dumpable != SUID_DUMP_USER && - !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { - rcu_read_unlock(); - return -EPERM; - } - rcu_read_unlock(); + mm = task->mm; + if (mm && + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptrace_has_cap(mm->user_ns, mode))) + return -EPERM; return security_ptrace_access_check(task, mode); } @@ -331,6 +326,11 @@ static int ptrace_attach(struct task_struct *task, long request, task_lock(task); retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); + if (!retval) { + struct mm_struct *mm = task->mm; + if (mm && ns_capable(mm->user_ns, CAP_SYS_PTRACE)) + flags |= PT_PTRACE_CAP; + } task_unlock(task); if (retval) goto unlock_creds; @@ -344,10 +344,6 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) flags |= PT_SEIZED; - rcu_read_lock(); - if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE)) - flags |= PT_PTRACE_CAP; - rcu_read_unlock(); task->ptrace = flags; __ptrace_link(task, current); diff --git a/mm/init-mm.c b/mm/init-mm.c index a56a851908d2..975e49f00f34 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -21,5 +22,6 @@ struct mm_struct init_mm = { .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), + .user_ns = &init_user_ns, INIT_MM_CONTEXT(init_mm) }; -- GitLab From 48466c4772d2cfeb331395043f7edd8c5324ef1b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 14 Dec 2016 12:45:25 -0800 Subject: [PATCH 0037/1442] vfs,mm: fix return value of read() at s_maxbytes commit d05c5f7ba164aed3db02fb188c26d0dd94f5455b upstream. We truncated the possible read iterator to s_maxbytes in commit c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()"), but our end condition handling was wrong: it's not an error to try to read at the end of the file. Reading past the end should return EOF (0), not EINVAL. See for example https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1649342 http://lists.gnu.org/archive/html/bug-coreutils/2016-12/msg00008.html where a md5sum of a maximally sized file fails because the final read is exactly at s_maxbytes. Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") Reported-by: Joseph Salisbury Cc: Wei Fang Cc: Christoph Hellwig Cc: Dave Chinner Cc: Al Viro Cc: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 50b52fe51937..9a50acecc473 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1686,7 +1686,7 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, int error = 0; if (unlikely(*ppos >= inode->i_sb->s_maxbytes)) - return -EINVAL; + return 0; iov_iter_truncate(iter, inode->i_sb->s_maxbytes); index = *ppos >> PAGE_SHIFT; -- GitLab From e747b4ae3b6bca205d82e86366e140cdcbfb7731 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 14 Nov 2016 18:48:07 -0600 Subject: [PATCH 0038/1442] ptrace: Capture the ptracer's creds not PT_PTRACE_CAP commit 64b875f7ac8a5d60a4e191479299e931ee949b67 upstream. When the flag PT_PTRACE_CAP was added the PTRACE_TRACEME path was overlooked. This can result in incorrect behavior when an application like strace traces an exec of a setuid executable. Further PT_PTRACE_CAP does not have enough information for making good security decisions as it does not report which user namespace the capability is in. This has already allowed one mistake through insufficient granulariy. I found this issue when I was testing another corner case of exec and discovered that I could not get strace to set PT_PTRACE_CAP even when running strace as root with a full set of caps. This change fixes the above issue with strace allowing stracing as root a setuid executable without disabling setuid. More fundamentaly this change allows what is allowable at all times, by using the correct information in it's decision. Fixes: 4214e42f96d4 ("v2.4.9.11 -> v2.4.9.12") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 2 +- include/linux/capability.h | 1 + include/linux/ptrace.h | 1 - include/linux/sched.h | 1 + kernel/capability.c | 20 ++++++++++++++++++++ kernel/ptrace.c | 12 +++++++----- 6 files changed, 30 insertions(+), 7 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index a9dec2f540dc..67e86571685a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1425,7 +1425,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm) unsigned n_fs; if (p->ptrace) { - if (p->ptrace & PT_PTRACE_CAP) + if (ptracer_capable(p, current_user_ns())) bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP; else bprm->unsafe |= LSM_UNSAFE_PTRACE; diff --git a/include/linux/capability.h b/include/linux/capability.h index 6ce3eefe2769..6ffb67e10c06 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -243,6 +243,7 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap) extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode); extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); +extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 504c98a278d4..e13bfdf7f314 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -19,7 +19,6 @@ #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 #define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ -#define PT_PTRACE_CAP 0x00000004 /* ptracer can follow suid-exec */ #define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ diff --git a/include/linux/sched.h b/include/linux/sched.h index e9c009dc3a4a..75d9a57e212e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1656,6 +1656,7 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ + const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task diff --git a/kernel/capability.c b/kernel/capability.c index c020c0047983..4984e1f552eb 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -485,3 +485,23 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap) return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode); } EXPORT_SYMBOL(capable_wrt_inode_uidgid); + +/** + * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace + * @tsk: The task that may be ptraced + * @ns: The user namespace to search for CAP_SYS_PTRACE in + * + * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE + * in the specified user namespace. + */ +bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns) +{ + int ret = 0; /* An absent tracer adds no restrictions */ + const struct cred *cred; + rcu_read_lock(); + cred = rcu_dereference(tsk->ptracer_cred); + if (cred) + ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE); + rcu_read_unlock(); + return (ret == 0); +} diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 282821557183..e82c15cadd6d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -39,6 +39,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) BUG_ON(!list_empty(&child->ptrace_entry)); list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; + rcu_read_lock(); + child->ptracer_cred = get_cred(__task_cred(new_parent)); + rcu_read_unlock(); } /** @@ -71,12 +74,16 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) */ void __ptrace_unlink(struct task_struct *child) { + const struct cred *old_cred; BUG_ON(!child->ptrace); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->parent = child->real_parent; list_del_init(&child->ptrace_entry); + old_cred = child->ptracer_cred; + child->ptracer_cred = NULL; + put_cred(old_cred); spin_lock(&child->sighand->siglock); child->ptrace = 0; @@ -326,11 +333,6 @@ static int ptrace_attach(struct task_struct *task, long request, task_lock(task); retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); - if (!retval) { - struct mm_struct *mm = task->mm; - if (mm && ns_capable(mm->user_ns, CAP_SYS_PTRACE)) - flags |= PT_PTRACE_CAP; - } task_unlock(task); if (retval) goto unlock_creds; -- GitLab From e71b4e061c9677cef1f1f38fd7236e198fab1287 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 22 Nov 2016 12:06:50 -0600 Subject: [PATCH 0039/1442] ptrace: Don't allow accessing an undumpable mm commit 84d77d3f06e7e8dea057d10e8ec77ad71f721be3 upstream. It is the reasonable expectation that if an executable file is not readable there will be no way for a user without special privileges to read the file. This is enforced in ptrace_attach but if ptrace is already attached before exec there is no enforcement for read-only executables. As the only way to read such an mm is through access_process_vm spin a variant called ptrace_access_vm that will fail if the target process is not being ptraced by the current process, or the current process did not have sufficient privileges when ptracing began to read the target processes mm. In the ptrace implementations replace access_process_vm by ptrace_access_vm. There remain several ptrace sites that still use access_process_vm as they are reading the target executables instructions (for kernel consumption) or register stacks. As such it does not appear necessary to add a permission check to those calls. This bug has always existed in Linux. Fixes: v1.0 Reported-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/alpha/kernel/ptrace.c | 2 +- arch/blackfin/kernel/ptrace.c | 4 +-- arch/cris/arch-v32/kernel/ptrace.c | 2 +- arch/ia64/kernel/ptrace.c | 2 +- arch/mips/kernel/ptrace32.c | 4 +-- arch/powerpc/kernel/ptrace32.c | 4 +-- include/linux/mm.h | 2 ++ include/linux/ptrace.h | 3 +++ kernel/ptrace.c | 42 +++++++++++++++++++++++++----- mm/memory.c | 2 +- mm/nommu.c | 2 +- 11 files changed, 52 insertions(+), 17 deletions(-) diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index 940dfb406591..04abdec7f496 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -283,7 +283,7 @@ long arch_ptrace(struct task_struct *child, long request, /* When I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), + copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); ret = -EIO; if (copied != sizeof(tmp)) diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index 8d79286ee4e8..360d99645163 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -270,7 +270,7 @@ long arch_ptrace(struct task_struct *child, long request, switch (bfin_mem_access_type(addr, to_copy)) { case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: - copied = access_process_vm(child, addr, &tmp, + copied = ptrace_access_vm(child, addr, &tmp, to_copy, FOLL_FORCE); if (copied) break; @@ -323,7 +323,7 @@ long arch_ptrace(struct task_struct *child, long request, switch (bfin_mem_access_type(addr, to_copy)) { case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: - copied = access_process_vm(child, addr, &data, + copied = ptrace_access_vm(child, addr, &data, to_copy, FOLL_FORCE | FOLL_WRITE); break; diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index f0df654ac6fc..fe1f9cf7b391 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request, /* The trampoline page is globally mapped, no page table to traverse.*/ tmp = *(unsigned long*)addr; } else { - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); + copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 31aa8c0f68e1..36f660da8124 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1159,7 +1159,7 @@ arch_ptrace (struct task_struct *child, long request, case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: /* read word at location addr */ - if (access_process_vm(child, addr, &data, sizeof(data), + if (ptrace_access_vm(child, addr, &data, sizeof(data), FOLL_FORCE) != sizeof(data)) return -EIO; diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 7e71a4e0281b..5fcbdcd7abd0 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -69,7 +69,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0) break; - copied = access_process_vm(child, (u64)addrOthers, &tmp, + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0) break; ret = 0; - if (access_process_vm(child, (u64)addrOthers, &data, + if (ptrace_access_vm(child, (u64)addrOthers, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE) == sizeof(data)) break; diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 010b7b310237..1e887f3a61a6 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -73,7 +73,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; - copied = access_process_vm(child, (u64)addrOthers, &tmp, + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; ret = 0; - if (access_process_vm(child, (u64)addrOthers, &tmp, + if (ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE | FOLL_WRITE) == sizeof(tmp)) break; diff --git a/include/linux/mm.h b/include/linux/mm.h index a92c8d73aeaf..0b5b2e4df14e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1270,6 +1270,8 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void * unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); +extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, void *buf, int len, unsigned int gup_flags); long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index e13bfdf7f314..e0e539321ab9 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -8,6 +8,9 @@ #include /* For task_active_pid_ns. */ #include +extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags); + /* * Ptrace flags * diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e82c15cadd6d..49ba7c1ade9d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -27,6 +27,35 @@ #include #include +/* + * Access another process' address space via ptrace. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ +int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + if (!tsk->ptrace || + (current != tsk->parent) || + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptracer_capable(tsk, mm->user_ns))) { + mmput(mm); + return 0; + } + + ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); + mmput(mm); + + return ret; +} + /* * ptrace a task: make the debugger its new parent and @@ -535,7 +564,8 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst int this_len, retval; this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE); + retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE); + if (!retval) { if (copied) break; @@ -562,7 +592,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds this_len = (len > sizeof(buf)) ? sizeof(buf) : len; if (copy_from_user(buf, src, this_len)) return -EFAULT; - retval = access_process_vm(tsk, dst, buf, this_len, + retval = ptrace_access_vm(tsk, dst, buf, this_len, FOLL_FORCE | FOLL_WRITE); if (!retval) { if (copied) @@ -1126,7 +1156,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, unsigned long tmp; int copied; - copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); + copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) return -EIO; return put_user(tmp, (unsigned long __user *)data); @@ -1137,7 +1167,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, { int copied; - copied = access_process_vm(tsk, addr, &data, sizeof(data), + copied = ptrace_access_vm(tsk, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(data)) ? 0 : -EIO; } @@ -1155,7 +1185,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: - ret = access_process_vm(child, addr, &word, sizeof(word), + ret = ptrace_access_vm(child, addr, &word, sizeof(word), FOLL_FORCE); if (ret != sizeof(word)) ret = -EIO; @@ -1165,7 +1195,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, case PTRACE_POKETEXT: case PTRACE_POKEDATA: - ret = access_process_vm(child, addr, &data, sizeof(data), + ret = ptrace_access_vm(child, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); ret = (ret != sizeof(data) ? -EIO : 0); break; diff --git a/mm/memory.c b/mm/memory.c index e18c57bdc75c..cbb1e5e5f791 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3868,7 +3868,7 @@ EXPORT_SYMBOL_GPL(generic_access_phys); * Access another process' address space as given in mm. If non-NULL, use the * given task for page fault accounting. */ -static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, +int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; diff --git a/mm/nommu.c b/mm/nommu.c index 8b8faaf2a9e9..44265e00b701 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1808,7 +1808,7 @@ void filemap_map_pages(struct fault_env *fe, } EXPORT_SYMBOL(filemap_map_pages); -static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, +int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; -- GitLab From 21cc91554c3da12708c9a85a98ce144409dc21ef Mon Sep 17 00:00:00 2001 From: Alex Porosanu Date: Wed, 9 Nov 2016 10:46:11 +0200 Subject: [PATCH 0040/1442] crypto: caam - fix AEAD givenc descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d128af17876d79b87edf048303f98b35f6a53dbc upstream. The AEAD givenc descriptor relies on moving the IV through the output FIFO and then back to the CTX2 for authentication. The SEQ FIFO STORE could be scheduled before the data can be read from OFIFO, especially since the SEQ FIFO LOAD needs to wait for the SEQ FIFO LOAD SKIP to finish first. The SKIP takes more time when the input is SG than when it's a contiguous buffer. If the SEQ FIFO LOAD is not scheduled before the STORE, the DECO will hang waiting for data to be available in the OFIFO so it can be transferred to C2. In order to overcome this, first force transfer of IV to C2 by starting the "cryptlen" transfer first and then starting to store data from OFIFO to the output buffer. Fixes: 1acebad3d8db8 ("crypto: caam - faster aead implementation") Signed-off-by: Alex Porosanu Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caamalg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 954a64c7757b..c310318b34dd 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -736,7 +736,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* Will read cryptlen */ append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2); + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF | + FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH); + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); /* Write ICV */ append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB | -- GitLab From 24d1251a5d83ee121d978f3e7cb5c19ffb4272b9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 13 Nov 2016 22:02:29 -0500 Subject: [PATCH 0041/1442] ext4: don't lock buffer in ext4_commit_super if holding spinlock commit 1566a48aaa10c6bb29b9a69dd8279f9a4fc41e35 upstream. If there is an error reported in mballoc via ext4_grp_locked_error(), the code is holding a spinlock, so ext4_commit_super() must not try to lock the buffer head, or else it will trigger a BUG: BUG: sleeping function called from invalid context at ./include/linux/buffer_head.h:358 in_atomic(): 1, irqs_disabled(): 0, pid: 993, name: mount CPU: 0 PID: 993 Comm: mount Not tainted 4.9.0-rc1-clouder1 #62 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 ffff880006423548 ffffffff81318c89 ffffffff819ecdd0 0000000000000166 ffff880006423558 ffffffff810810b0 ffff880006423580 ffffffff81081153 ffff880006e5a1a0 ffff88000690e400 0000000000000000 ffff8800064235c0 Call Trace: [] dump_stack+0x67/0x9e [] ___might_sleep+0xf0/0x140 [] __might_sleep+0x53/0xb0 [] ext4_commit_super+0x19c/0x290 [] __ext4_grp_locked_error+0x14a/0x230 [] ? __might_sleep+0x53/0xb0 [] ext4_mb_generate_buddy+0x1de/0x320 Since ext4_grp_locked_error() calls ext4_commit_super with sync == 0 (and it is the only caller which does so), avoid locking and unlocking the buffer in this case. This can result in races with ext4_commit_super() if there are other problems (which is what commit 4743f83990614 was trying to address), but a Warning is better than BUG. Fixes: 4743f83990614 Reported-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 52b0530c5d65..5a60eecf4deb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4550,7 +4550,8 @@ static int ext4_commit_super(struct super_block *sb, int sync) &EXT4_SB(sb)->s_freeinodes_counter)); BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); - lock_buffer(sbh); + if (sync) + lock_buffer(sbh); if (buffer_write_io_error(sbh)) { /* * Oh, dear. A previous attempt to write the @@ -4566,8 +4567,8 @@ static int ext4_commit_super(struct super_block *sb, int sync) set_buffer_uptodate(sbh); } mark_buffer_dirty(sbh); - unlock_buffer(sbh); if (sync) { + unlock_buffer(sbh); error = __sync_dirty_buffer(sbh, test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC); if (error) -- GitLab From c3881abae6e7060d17beaa4f2af3190ff0937fd2 Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Mon, 14 Nov 2016 21:04:37 -0500 Subject: [PATCH 0042/1442] ext4: fix mballoc breakage with 64k block size commit 69e43e8cc971a79dd1ee5d4343d8e63f82725123 upstream. 'border' variable is set to a value of 2 times the block size of the underlying filesystem. With 64k block size, the resulting value won't fit into a 16-bit variable. Hence this commit changes the data type of 'border' to 'unsigned int'. Fixes: c9de560ded61f Signed-off-by: Chandan Rajendra Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f418f55c2bbe..a937ac7ef99f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -669,7 +669,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, ext4_grpblk_t min; ext4_grpblk_t max; ext4_grpblk_t chunk; - unsigned short border; + unsigned int border; BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb)); -- GitLab From b493c715cdce57c803413e635e19f34f96041430 Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Mon, 14 Nov 2016 21:26:26 -0500 Subject: [PATCH 0043/1442] ext4: fix stack memory corruption with 64k block size commit 30a9d7afe70ed6bd9191d3000e2ef1a34fb58493 upstream. The number of 'counters' elements needed in 'struct sg' is super_block->s_blocksize_bits + 2. Presently we have 16 'counters' elements in the array. This is insufficient for block sizes >= 32k. In such cases the memcpy operation performed in ext4_mb_seq_groups_show() would cause stack memory corruption. Fixes: c9de560ded61f Signed-off-by: Chandan Rajendra Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a937ac7ef99f..7ae43c59bc79 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2287,7 +2287,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) struct ext4_group_info *grinfo; struct sg { struct ext4_group_info info; - ext4_grpblk_t counters[16]; + ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; } sg; group--; -- GitLab From 01772f4683a9f92a06b3d2467c0084df2a512f35 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 18 Nov 2016 13:28:30 -0500 Subject: [PATCH 0044/1442] ext4: use more strict checks for inodes_per_block on mount commit cd6bb35bf7f6d7d922509bf50265383a0ceabe96 upstream. Centralize the checks for inodes_per_block and be more strict to make sure the inodes_per_block_group can't end up being zero. Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5a60eecf4deb..2113aad08c89 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3660,12 +3660,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); - if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) - goto cantfind_ext4; sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); if (sbi->s_inodes_per_block == 0) goto cantfind_ext4; + if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || + sbi->s_inodes_per_group > blocksize * 8) { + ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", + sbi->s_blocks_per_group); + goto failed_mount; + } sbi->s_itb_per_group = sbi->s_inodes_per_group / sbi->s_inodes_per_block; sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); @@ -3748,13 +3752,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } sbi->s_cluster_ratio = clustersize / blocksize; - if (sbi->s_inodes_per_group > blocksize * 8) { - ext4_msg(sb, KERN_ERR, - "#inodes per group too big: %lu", - sbi->s_inodes_per_group); - goto failed_mount; - } - /* Do we have standard group size of clustersize * 8 blocks ? */ if (sbi->s_blocks_per_group == clustersize << 3) set_opt2(sb, STD_GROUP_SIZE); -- GitLab From 956e2a0e6779824877822864ad486f08184f1a3e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 18 Nov 2016 13:24:26 -0500 Subject: [PATCH 0045/1442] ext4: fix in-superblock mount options processing commit 5aee0f8a3f42c94c5012f1673420aee96315925a upstream. Fix a large number of problems with how we handle mount options in the superblock. For one, if the string in the superblock is long enough that it is not null terminated, we could run off the end of the string and try to interpret superblocks fields as characters. It's unlikely this will cause a security problem, but it could result in an invalid parse. Also, parse_options is destructive to the string, so in some cases if there is a comma-separated string, it would be modified in the superblock. (Fortunately it only happens on file systems with a 1k block size.) Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2113aad08c89..74596847c927 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3301,7 +3301,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) char *orig_data = kstrdup(data, GFP_KERNEL); struct buffer_head *bh; struct ext4_super_block *es = NULL; - struct ext4_sb_info *sbi; + struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); ext4_fsblk_t block; ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; @@ -3320,16 +3320,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; ext4_group_t first_not_zeroed; - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) - goto out_free_orig; + if ((data && !orig_data) || !sbi) + goto out_free_base; sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); - if (!sbi->s_blockgroup_lock) { - kfree(sbi); - goto out_free_orig; - } + if (!sbi->s_blockgroup_lock) + goto out_free_base; + sb->s_fs_info = sbi; sbi->s_sb = sb; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; @@ -3475,11 +3473,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; - if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, - &journal_devnum, &journal_ioprio, 0)) { - ext4_msg(sb, KERN_WARNING, - "failed to parse options in superblock: %s", - sbi->s_es->s_mount_opts); + if (sbi->s_es->s_mount_opts[0]) { + char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, + sizeof(sbi->s_es->s_mount_opts), + GFP_KERNEL); + if (!s_mount_opts) + goto failed_mount; + if (!parse_options(s_mount_opts, sb, &journal_devnum, + &journal_ioprio, 0)) { + ext4_msg(sb, KERN_WARNING, + "failed to parse options in superblock: %s", + s_mount_opts); + } + kfree(s_mount_opts); } sbi->s_def_mount_opt = sbi->s_mount_opt; if (!parse_options((char *) data, sb, &journal_devnum, @@ -4157,7 +4163,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, + "Opts: %.*s%s%s", descr, + (int) sizeof(sbi->s_es->s_mount_opts), + sbi->s_es->s_mount_opts, *sbi->s_es->s_mount_opts ? "; " : "", orig_data); if (es->s_error_count) @@ -4236,8 +4244,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) out_fail: sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); +out_free_base: kfree(sbi); -out_free_orig: kfree(orig_data); return err ? err : ret; } -- GitLab From 8084f57bc46894dbe08dac2a09d66553d7513a34 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 18 Nov 2016 13:37:47 -0500 Subject: [PATCH 0046/1442] ext4: add sanity checking to count_overhead() commit c48ae41bafe31e9a66d8be2ced4e42a6b57fa814 upstream. The commit "ext4: sanity check the block and cluster size at mount time" should prevent any problems, but in case the superblock is modified while the file system is mounted, add an extra safety check to make sure we won't overrun the allocated buffer. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 74596847c927..334beea19883 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3193,10 +3193,15 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_set_bit(s++, buf); count++; } - for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { - ext4_set_bit(EXT4_B2C(sbi, s++), buf); - count++; + j = ext4_bg_num_gdb(sb, grp); + if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) { + ext4_error(sb, "Invalid number of block group " + "descriptor blocks: %d", j); + j = EXT4_BLOCKS_PER_GROUP(sb) - s; } + count += j; + for (; j > 0; j--) + ext4_set_bit(EXT4_B2C(sbi, s++), buf); } if (!count) return 0; -- GitLab From 3e4f8da9d177b447fdd5e82c94f66bac38aa3045 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 10 Dec 2016 09:55:01 -0500 Subject: [PATCH 0047/1442] ext4: reject inodes with negative size commit 7e6e1ef48fc02f3ac5d0edecbb0c6087cd758d58 upstream. Don't load an inode with a negative size; this causes integer overflow problems in the VFS. [ Added EXT4_ERROR_INODE() to mark file system as corrupted. -TYT] Fixes: a48380f769df (ext4: rename i_dir_acl to i_size_high) Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9c064727ed62..33a509c876ee 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4434,6 +4434,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) struct inode *inode; journal_t *journal = EXT4_SB(sb)->s_journal; long ret; + loff_t size; int block; uid_t i_uid; gid_t i_gid; @@ -4534,6 +4535,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_file_acl |= ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(raw_inode); + if ((size = i_size_read(inode)) < 0) { + EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; -- GitLab From acf3efd6f0037582209cc589392a06ed26d9032b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 10 Dec 2016 09:56:01 -0500 Subject: [PATCH 0048/1442] ext4: return -ENOMEM instead of success commit 578620f451f836389424833f1454eeeb2ffc9e9f upstream. We should set the error code if kzalloc() fails. Fixes: 67cf5b09a46f ("ext4: add the basic function for inline data support") Signed-off-by: Dan Carpenter Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index f74d5ee2cdec..d8ca4b9f9dd6 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -336,8 +336,10 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode, len -= EXT4_MIN_INLINE_DATA_SIZE; value = kzalloc(len, GFP_NOFS); - if (!value) + if (!value) { + error = -ENOMEM; goto out; + } error = ext4_xattr_ibody_get(inode, i.name_index, i.name, value, len); -- GitLab From 9abce3ca80a735218854cd5ad4b3cc567ed1b9c9 Mon Sep 17 00:00:00 2001 From: Sergey Karamov Date: Sat, 10 Dec 2016 17:54:58 -0500 Subject: [PATCH 0049/1442] ext4: do not perform data journaling when data is encrypted commit 73b92a2a5e97d17cc4d5c4fe9d724d3273fb6fd2 upstream. Currently data journalling is incompatible with encryption: enabling both at the same time has never been supported by design, and would result in unpredictable behavior. However, users are not precluded from turning on both features simultaneously. This change programmatically replaces data journaling for encrypted regular files with ordered data journaling mode. Background: Journaling encrypted data has not been supported because it operates on buffer heads of the page in the page cache. Namely, when the commit happens, which could be up to five seconds after caching, the commit thread uses the buffer heads attached to the page to copy the contents of the page to the journal. With encryption, it would have been required to keep the bounce buffer with ciphertext for up to the aforementioned five seconds, since the page cache can only hold plaintext and could not be used for journaling. Alternatively, it would be required to setup the journal to initiate a callback at the commit time to perform deferred encryption - in this case, not only would the data have to be written twice, but it would also have to be encrypted twice. This level of complexity was not justified for a mode that in practice is very rarely used because of the overhead from the data journalling. Solution: If data=journaled has been set as a mount option for a filesystem, or if journaling is enabled on a regular file, do not perform journaling if the file is also encrypted, instead fall back to the data=ordered mode for the file. Rationale: The intent is to allow seamless and proper filesystem operation when journaling and encryption have both been enabled, and have these two conflicting features gracefully resolved by the filesystem. Fixes: 4461471107b7 Signed-off-by: Sergey Karamov Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4_jbd2.h | 14 ++++++++------ fs/ext4/super.c | 5 +++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index b1d52c14098e..f97611171023 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -414,17 +414,19 @@ static inline int ext4_inode_journal_mode(struct inode *inode) return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ /* We do not support data journalling with delayed allocation */ if (!S_ISREG(inode->i_mode) || - test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) - return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && - !test_opt(inode->i_sb, DELALLOC)) + test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || + (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && + !test_opt(inode->i_sb, DELALLOC))) { + /* We do not support data journalling for encrypted data */ + if (S_ISREG(inode->i_mode) && ext4_encrypted_inode(inode)) + return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ + } if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ - else - BUG(); + BUG(); } static inline int ext4_should_journal_data(struct inode *inode) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 334beea19883..478630af0d19 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3516,6 +3516,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "both data=journal and dax"); goto failed_mount; } + if (ext4_has_feature_encrypt(sb)) { + ext4_msg(sb, KERN_WARNING, + "encrypted files will use data=ordered " + "instead of data journaling mode"); + } if (test_opt(sb, DELALLOC)) clear_opt(sb, DELALLOC); } else { -- GitLab From a43e1c459a3dc6e2d44ae53c7f5b107006663cde Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 2 Dec 2016 15:11:32 -0800 Subject: [PATCH 0050/1442] Revert "f2fs: use percpu_counter for # of dirty pages in inode" commit 204706c7accfabb67b97eef9f9a28361b6201199 upstream. This reverts commit 1beba1b3a953107c3ff5448ab4e4297db4619c76. The perpcu_counter doesn't provide atomicity in single core and consume more DRAM. That incurs fs_mark test failure due to ENOMEM. Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/f2fs.h | 10 +++++----- fs/f2fs/file.c | 2 +- fs/f2fs/super.c | 7 +------ 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9e8de18a168a..830c51b36089 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -428,7 +428,7 @@ struct f2fs_inode_info { /* Use below internally in f2fs*/ unsigned long flags; /* use to pass per-file flags */ struct rw_semaphore i_sem; /* protect fi info */ - struct percpu_counter dirty_pages; /* # of dirty pages */ + atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ nid_t i_xattr_nid; /* node id that contains xattrs */ @@ -1242,7 +1242,7 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) static inline void inode_inc_dirty_pages(struct inode *inode) { - percpu_counter_inc(&F2FS_I(inode)->dirty_pages); + atomic_inc(&F2FS_I(inode)->dirty_pages); inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); } @@ -1258,7 +1258,7 @@ static inline void inode_dec_dirty_pages(struct inode *inode) !S_ISLNK(inode->i_mode)) return; - percpu_counter_dec(&F2FS_I(inode)->dirty_pages); + atomic_dec(&F2FS_I(inode)->dirty_pages); dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); } @@ -1268,9 +1268,9 @@ static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) return percpu_counter_sum_positive(&sbi->nr_pages[count_type]); } -static inline s64 get_dirty_pages(struct inode *inode) +static inline int get_dirty_pages(struct inode *inode) { - return percpu_counter_sum_positive(&F2FS_I(inode)->dirty_pages); + return atomic_read(&F2FS_I(inode)->dirty_pages); } static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c7865073cd26..4d6fbdb7ab9b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1526,7 +1526,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) goto out; f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, - "Unexpected flush for atomic writes: ino=%lu, npages=%lld", + "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6132b4ce4e4c..8021d35df7b0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -558,13 +558,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_once((void *) fi); - if (percpu_counter_init(&fi->dirty_pages, 0, GFP_NOFS)) { - kmem_cache_free(f2fs_inode_cachep, fi); - return NULL; - } - /* Initialize f2fs-specific inode info */ fi->vfs_inode.i_version = 1; + atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; fi->i_advise = 0; init_rwsem(&fi->i_sem); @@ -687,7 +683,6 @@ static void f2fs_i_callback(struct rcu_head *head) static void f2fs_destroy_inode(struct inode *inode) { - percpu_counter_destroy(&F2FS_I(inode)->dirty_pages); call_rcu(&inode->i_rcu, f2fs_i_callback); } -- GitLab From 1134ef11ffffcfb69834c86f632c0ce0c639524a Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Sun, 20 Nov 2016 19:57:23 +0100 Subject: [PATCH 0051/1442] f2fs: set ->owner for debugfs status file's file_operations commit 05e6ea2685c964db1e675a24a4f4e2adc22d2388 upstream. The struct file_operations instance serving the f2fs/status debugfs file lacks an initialization of its ->owner. This means that although that file might have been opened, the f2fs module can still get removed. Any further operation on that opened file, releasing included, will cause accesses to unmapped memory. Indeed, Mike Marshall reported the following: BUG: unable to handle kernel paging request at ffffffffa0307430 IP: [] full_proxy_release+0x24/0x90 <...> Call Trace: [] __fput+0xdf/0x1d0 [] ____fput+0xe/0x10 [] task_work_run+0x8e/0xc0 [] do_exit+0x2ae/0xae0 [] ? __audit_syscall_entry+0xae/0x100 [] ? syscall_trace_enter+0x1ca/0x310 [] do_group_exit+0x44/0xc0 [] SyS_exit_group+0x14/0x20 [] do_syscall_64+0x61/0x150 [] entry_SYSCALL64_slow_path+0x25/0x25 <...> ---[ end trace f22ae883fa3ea6b8 ]--- Fixing recursive fault but reboot is needed! Fix this by initializing the f2fs/status file_operations' ->owner with THIS_MODULE. This will allow debugfs to grab a reference to the f2fs module upon any open on that file, thus preventing it from getting removed. Fixes: 902829aa0b72 ("f2fs: move proc files to debugfs") Reported-by: Mike Marshall Reported-by: Martin Brandenburg Signed-off-by: Nicolai Stange Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index fb245bd302e4..1c35e80732e0 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -373,6 +373,7 @@ static int stat_open(struct inode *inode, struct file *file) } static const struct file_operations stat_fops = { + .owner = THIS_MODULE, .open = stat_open, .read = seq_read, .llseek = seq_lseek, -- GitLab From 027611ef345d118310c1334b5d25b27ff95da684 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Nov 2016 10:51:17 -0800 Subject: [PATCH 0052/1442] f2fs: fix overflow due to condition check order commit e87f7329bbd6760c2acc4f1eb423362b08851a71 upstream. In the last ilen case, i was already increased, resulting in accessing out- of-boundary entry of do_replace and blkaddr. Fix to check ilen first to exit the loop. Fixes: 2aa8fbb9693020 ("f2fs: refactor __exchange_data_block for speed up") Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4d6fbdb7ab9b..801111e1f8ef 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -967,7 +967,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, new_size = (dst + i) << PAGE_SHIFT; if (dst_inode->i_size < new_size) f2fs_i_size_write(dst_inode, new_size); - } while ((do_replace[i] || blkaddr[i] == NULL_ADDR) && --ilen); + } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); f2fs_put_dnode(&dn); } else { -- GitLab From 01e15b3328c4dc2073bc5080f1b31f21370be3f8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 Nov 2016 12:45:15 -0800 Subject: [PATCH 0053/1442] f2fs: fix to determine start_cp_addr by sbi->cur_cp_pack commit 8508e44ae98622f841f5ef29d0bf3d5db4e0c1cc upstream. We don't guarantee cp_addr is fixed by cp_version. This is to sync with f2fs-tools. Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/checkpoint.c | 8 +++++++- fs/f2fs/f2fs.h | 26 ++++++++++++++++---------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7e9b504bd8b2..b4dbc2f59656 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -772,6 +772,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) if (sanity_check_ckpt(sbi)) goto fail_no_cp; + if (cur_page == cp1) + sbi->cur_cp_pack = 1; + else + sbi->cur_cp_pack = 2; + if (cp_blks <= 1) goto done; @@ -1123,7 +1128,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) le32_to_cpu(ckpt->checksum_offset))) = cpu_to_le32(crc32); - start_blk = __start_cp_addr(sbi); + start_blk = __start_cp_next_addr(sbi); /* need to wait for end_io results */ wait_on_all_pages_writeback(sbi); @@ -1187,6 +1192,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_prefree_segments(sbi, cpc); clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + __set_cp_next_pack(sbi); /* * redirty superblock if metadata like node page or inode cache is diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 830c51b36089..6dd03115789b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -764,6 +764,7 @@ struct f2fs_sb_info { /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ + int cur_cp_pack; /* remain current cp pack */ spinlock_t cp_lock; /* for flag in ckpt */ struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ @@ -1329,22 +1330,27 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) { - block_t start_addr; - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - unsigned long long ckpt_version = cur_cp_version(ckpt); - - start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); + block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); - /* - * odd numbered checkpoint should at cp segment 0 - * and even segment must be at cp segment 1 - */ - if (!(ckpt_version & 1)) + if (sbi->cur_cp_pack == 2) start_addr += sbi->blocks_per_seg; + return start_addr; +} +static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi) +{ + block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); + + if (sbi->cur_cp_pack == 1) + start_addr += sbi->blocks_per_seg; return start_addr; } +static inline void __set_cp_next_pack(struct f2fs_sb_info *sbi) +{ + sbi->cur_cp_pack = (sbi->cur_cp_pack == 1) ? 2 : 1; +} + static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) { return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); -- GitLab From 890c39d35eb070eccad71564f55f4910668b25c4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 14 Nov 2016 14:56:17 -0800 Subject: [PATCH 0054/1442] loop: return proper error from loop_queue_rq() commit b4a567e8114327518c09f5632339a5954ab975a3 upstream. ->queue_rq() should return one of the BLK_MQ_RQ_QUEUE_* constants, not an errno. Fixes: f4aa4c7bbac6 ("block: loop: convert to per-device workqueue") Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index fa1b7a90ba11..4af818766797 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1646,7 +1646,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); if (lo->lo_state != Lo_bound) - return -EIO; + return BLK_MQ_RQ_QUEUE_ERROR; switch (req_op(cmd->rq)) { case REQ_OP_FLUSH: -- GitLab From fe3d462821b02df53e5c99b2c9b059824adbac0a Mon Sep 17 00:00:00 2001 From: Solganik Alexander Date: Sun, 30 Oct 2016 10:35:15 +0200 Subject: [PATCH 0055/1442] nvmet: Fix possible infinite loop triggered on hot namespace removal commit e4fcf07cca6a3b6c4be00df16f08be894325eaa3 upstream. When removing a namespace we delete it from the subsystem namespaces list with list_del_init which allows us to know if it is enabled or not. The problem is that list_del_init initialize the list next and does not respect the RCU list-traversal we do on the IO path for locating a namespace. Instead we need to use list_del_rcu which is allowed to run concurrently with the _rcu list-traversal primitives (keeps list next intact) and guarantees concurrent nvmet_find_naespace forward progress. By changing that, we cannot rely on ns->dev_link for knowing if the namspace is enabled, so add enabled indicator entry to nvmet_ns for that. Signed-off-by: Sagi Grimberg Signed-off-by: Solganik Alexander Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/configfs.c | 6 +++--- drivers/nvme/target/core.c | 14 ++++++++------ drivers/nvme/target/nvmet.h | 6 +----- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index af5e2dc4a3d5..011f88e5663e 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -271,7 +271,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item, mutex_lock(&subsys->lock); ret = -EBUSY; - if (nvmet_ns_enabled(ns)) + if (ns->enabled) goto out_unlock; kfree(ns->device_path); @@ -307,7 +307,7 @@ static ssize_t nvmet_ns_device_nguid_store(struct config_item *item, int ret = 0; mutex_lock(&subsys->lock); - if (nvmet_ns_enabled(ns)) { + if (ns->enabled) { ret = -EBUSY; goto out_unlock; } @@ -339,7 +339,7 @@ CONFIGFS_ATTR(nvmet_ns_, device_nguid); static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", nvmet_ns_enabled(to_nvmet_ns(item))); + return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled); } static ssize_t nvmet_ns_enable_store(struct config_item *item, diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a21437a33adb..55ce769cecee 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -264,7 +264,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) int ret = 0; mutex_lock(&subsys->lock); - if (!list_empty(&ns->dev_link)) + if (ns->enabled) goto out_unlock; ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, @@ -309,6 +309,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); + ns->enabled = true; ret = 0; out_unlock: mutex_unlock(&subsys->lock); @@ -325,11 +326,11 @@ void nvmet_ns_disable(struct nvmet_ns *ns) struct nvmet_ctrl *ctrl; mutex_lock(&subsys->lock); - if (list_empty(&ns->dev_link)) { - mutex_unlock(&subsys->lock); - return; - } - list_del_init(&ns->dev_link); + if (!ns->enabled) + goto out_unlock; + + ns->enabled = false; + list_del_rcu(&ns->dev_link); mutex_unlock(&subsys->lock); /* @@ -351,6 +352,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns) if (ns->bdev) blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); +out_unlock: mutex_unlock(&subsys->lock); } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 76b6eedccaf9..7655a351320f 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -47,6 +47,7 @@ struct nvmet_ns { loff_t size; u8 nguid[16]; + bool enabled; struct nvmet_subsys *subsys; const char *device_path; @@ -61,11 +62,6 @@ static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) return container_of(to_config_group(item), struct nvmet_ns, group); } -static inline bool nvmet_ns_enabled(struct nvmet_ns *ns) -{ - return !list_empty_careful(&ns->dev_link); -} - struct nvmet_cq { u16 qid; u16 size; -- GitLab From 04597beae7c23ccefd4502c0b9296d68292b807f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 12 Dec 2016 16:41:50 -0800 Subject: [PATCH 0056/1442] mm/vmscan.c: set correct defer count for shrinker commit 5f33a0803bbd781de916f5c7448cbbbbc763d911 upstream. Our system uses significantly more slab memory with memcg enabled with the latest kernel. With 3.10 kernel, slab uses 2G memory, while with 4.6 kernel, 6G memory is used. The shrinker has problem. Let's see we have two memcg for one shrinker. In do_shrink_slab: 1. Check cg1. nr_deferred = 0, assume total_scan = 700. batch size is 1024, then no memory is freed. nr_deferred = 700 2. Check cg2. nr_deferred = 700. Assume freeable = 20, then total_scan = 10 or 40. Let's assume it's 10. No memory is freed. nr_deferred = 10. The deferred share of cg1 is lost in this case. kswapd will free no memory even run above steps again and again. The fix makes sure one memcg's deferred share isn't lost. Link: http://lkml.kernel.org/r/2414be961b5d25892060315fbb56bb19d81d0c07.1476227351.git.shli@fb.com Signed-off-by: Shaohua Li Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index d75cdf360730..c4abf08861d2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -291,6 +291,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, int nid = shrinkctl->nid; long batch_size = shrinker->batch ? shrinker->batch : SHRINK_BATCH; + long scanned = 0, next_deferred; freeable = shrinker->count_objects(shrinker, shrinkctl); if (freeable == 0) @@ -312,7 +313,9 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", shrinker->scan_objects, total_scan); total_scan = freeable; - } + next_deferred = nr; + } else + next_deferred = total_scan; /* * We need to avoid excessive windup on filesystem shrinkers @@ -369,17 +372,22 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, count_vm_events(SLABS_SCANNED, nr_to_scan); total_scan -= nr_to_scan; + scanned += nr_to_scan; cond_resched(); } + if (next_deferred >= scanned) + next_deferred -= scanned; + else + next_deferred = 0; /* * move the unused scan count back into the shrinker in a * manner that handles concurrent updates. If we exhausted the * scan, there is no need to do an update. */ - if (total_scan > 0) - new_nr = atomic_long_add_return(total_scan, + if (next_deferred > 0) + new_nr = atomic_long_add_return(next_deferred, &shrinker->nr_deferred[nid]); else new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); -- GitLab From 44919a2ac4c6eca9f6e076e4485e06f251101a9a Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 12 Dec 2016 16:44:41 -0800 Subject: [PATCH 0057/1442] mm, page_alloc: keep pcp count and list contents in sync if struct page is corrupted commit a6de734bc002fe2027ccc074fbbd87d72957b7a4 upstream. Vlastimil Babka pointed out that commit 479f854a207c ("mm, page_alloc: defer debugging checks of pages allocated from the PCP") will allow the per-cpu list counter to be out of sync with the per-cpu list contents if a struct page is corrupted. The consequence is an infinite loop if the per-cpu lists get fully drained by free_pcppages_bulk because all the lists are empty but the count is positive. The infinite loop occurs here do { batch_free++; if (++migratetype == MIGRATE_PCPTYPES) migratetype = 0; list = &pcp->lists[migratetype]; } while (list_empty(list)); What the user sees is a bad page warning followed by a soft lockup with interrupts disabled in free_pcppages_bulk(). This patch keeps the accounting in sync. Fixes: 479f854a207c ("mm, page_alloc: defer debugging checks of pages allocated from the PCP") Link: http://lkml.kernel.org/r/20161202112951.23346-2-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Hillf Danton Cc: Christoph Lameter Cc: Johannes Weiner Cc: Jesper Dangaard Brouer Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6de9440e3ae2..34ada718ef47 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2192,7 +2192,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, bool cold) { - int i; + int i, alloced = 0; spin_lock(&zone->lock); for (i = 0; i < count; ++i) { @@ -2217,13 +2217,21 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, else list_add_tail(&page->lru, list); list = &page->lru; + alloced++; if (is_migrate_cma(get_pcppage_migratetype(page))) __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, -(1 << order)); } + + /* + * i pages were removed from the buddy list even if some leak due + * to check_pcp_refill failing so adjust NR_FREE_PAGES based + * on i. Do not confuse with 'alloced' which is the number of + * pages added to the pcp list. + */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); spin_unlock(&zone->lock); - return i; + return alloced; } #ifdef CONFIG_NUMA -- GitLab From eab1c4e2d0ad4509ccb8476a604074547dc202e0 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 28 Sep 2016 12:33:31 +0300 Subject: [PATCH 0058/1442] usb: gadget: composite: always set ep->mult to a sensible value commit eaa496ffaaf19591fe471a36cef366146eeb9153 upstream. ep->mult is supposed to be set to Isochronous and Interrupt Endapoint's multiplier value. This value is computed from different places depending on the link speed. If we're dealing with HighSpeed, then it's part of bits [12:11] of wMaxPacketSize. This case wasn't taken into consideration before. While at that, also make sure the ep->mult defaults to one so drivers can use it unconditionally and assume they'll never multiply ep->maxpacket to zero. Cc: Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 9 +++++++-- drivers/usb/gadget/function/uvc_video.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index f6a7583ab6d1..e38b21087d26 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -201,7 +201,12 @@ int config_ep_by_speed(struct usb_gadget *g, _ep->desc = chosen_desc; _ep->comp_desc = NULL; _ep->maxburst = 0; - _ep->mult = 0; + _ep->mult = 1; + + if (g->speed == USB_SPEED_HIGH && (usb_endpoint_xfer_isoc(_ep->desc) || + usb_endpoint_xfer_int(_ep->desc))) + _ep->mult = usb_endpoint_maxp(_ep->desc) & 0x7ff; + if (!want_comp_desc) return 0; @@ -218,7 +223,7 @@ int config_ep_by_speed(struct usb_gadget *g, switch (usb_endpoint_type(_ep->desc)) { case USB_ENDPOINT_XFER_ISOC: /* mult: bits 1:0 of bmAttributes */ - _ep->mult = comp_desc->bmAttributes & 0x3; + _ep->mult = (comp_desc->bmAttributes & 0x3) + 1; case USB_ENDPOINT_XFER_BULK: case USB_ENDPOINT_XFER_INT: _ep->maxburst = comp_desc->bMaxBurst + 1; diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 3d0d5d94a62f..0f01c04d7cbd 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -243,7 +243,7 @@ uvc_video_alloc_requests(struct uvc_video *video) req_size = video->ep->maxpacket * max_t(unsigned int, video->ep->maxburst, 1) - * (video->ep->mult + 1); + * (video->ep->mult); for (i = 0; i < UVC_NUM_REQUESTS; ++i) { video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL); -- GitLab From c7a8a0ac8fee26d3c20402da306a17bcbbbb367b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 30 Nov 2016 16:21:25 +0530 Subject: [PATCH 0059/1442] PM / OPP: Pass opp_table to dev_pm_opp_put_regulator() commit 91291d9ad92faa65a56a9a19d658d8049b78d3d4 upstream. Joonyoung Shim reported an interesting problem on his ARM octa-core Odoroid-XU3 platform. During system suspend, dev_pm_opp_put_regulator() was failing for a struct device for which dev_pm_opp_set_regulator() is called earlier. This happened because an earlier call to dev_pm_opp_of_cpumask_remove_table() function (from cpufreq-dt.c file) removed all the entries from opp_table->dev_list apart from the last CPU device in the cpumask of CPUs sharing the OPP. But both dev_pm_opp_set_regulator() and dev_pm_opp_put_regulator() routines get CPU device for the first CPU in the cpumask. And so the OPP core failed to find the OPP table for the struct device. This patch attempts to fix this problem by returning a pointer to the opp_table from dev_pm_opp_set_regulator() and using that as the parameter to dev_pm_opp_put_regulator(). This ensures that the dev_pm_opp_put_regulator() doesn't fail to find the opp table. Note that similar design problem also exists with other dev_pm_opp_put_*() APIs, but those aren't used currently by anyone and so we don't need to update them for now. Reported-by: Joonyoung Shim Signed-off-by: Stephen Boyd Signed-off-by: Viresh Kumar [ Viresh: Wrote commit log and tested on exynos 5250 ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/opp/core.c | 22 ++++++---------------- drivers/cpufreq/cpufreq-dt.c | 12 ++++++++---- include/linux/pm_opp.h | 11 ++++++----- 3 files changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 4c7c6da7a989..2824d3a5e9f0 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -1316,7 +1316,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name); * that this function is *NOT* called under RCU protection or in contexts where * mutex cannot be locked. */ -int dev_pm_opp_set_regulator(struct device *dev, const char *name) +struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name) { struct opp_table *opp_table; struct regulator *reg; @@ -1354,20 +1354,20 @@ int dev_pm_opp_set_regulator(struct device *dev, const char *name) opp_table->regulator = reg; mutex_unlock(&opp_table_lock); - return 0; + return opp_table; err: _remove_opp_table(opp_table); unlock: mutex_unlock(&opp_table_lock); - return ret; + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator); /** * dev_pm_opp_put_regulator() - Releases resources blocked for regulator - * @dev: Device for which regulator was set. + * @opp_table: OPP table returned from dev_pm_opp_set_regulator(). * * Locking: The internal opp_table and opp structures are RCU protected. * Hence this function internally uses RCU updater strategy with mutex locks @@ -1375,22 +1375,12 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator); * that this function is *NOT* called under RCU protection or in contexts where * mutex cannot be locked. */ -void dev_pm_opp_put_regulator(struct device *dev) +void dev_pm_opp_put_regulator(struct opp_table *opp_table) { - struct opp_table *opp_table; - mutex_lock(&opp_table_lock); - /* Check for existing table for 'dev' first */ - opp_table = _find_opp_table(dev); - if (IS_ERR(opp_table)) { - dev_err(dev, "Failed to find opp_table: %ld\n", - PTR_ERR(opp_table)); - goto unlock; - } - if (IS_ERR(opp_table->regulator)) { - dev_err(dev, "%s: Doesn't have regulator set\n", __func__); + pr_err("%s: Doesn't have regulator set\n", __func__); goto unlock; } diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 5c07ae05d69a..4d3ec92cbabf 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -28,6 +28,7 @@ #include "cpufreq-dt.h" struct private_data { + struct opp_table *opp_table; struct device *cpu_dev; struct thermal_cooling_device *cdev; const char *reg_name; @@ -143,6 +144,7 @@ static int resources_available(void) static int cpufreq_init(struct cpufreq_policy *policy) { struct cpufreq_frequency_table *freq_table; + struct opp_table *opp_table = NULL; struct private_data *priv; struct device *cpu_dev; struct clk *cpu_clk; @@ -186,8 +188,9 @@ static int cpufreq_init(struct cpufreq_policy *policy) */ name = find_supply_name(cpu_dev); if (name) { - ret = dev_pm_opp_set_regulator(cpu_dev, name); - if (ret) { + opp_table = dev_pm_opp_set_regulator(cpu_dev, name); + if (IS_ERR(opp_table)) { + ret = PTR_ERR(opp_table); dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", policy->cpu, ret); goto out_put_clk; @@ -237,6 +240,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) } priv->reg_name = name; + priv->opp_table = opp_table; ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); if (ret) { @@ -285,7 +289,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) out_free_opp: dev_pm_opp_of_cpumask_remove_table(policy->cpus); if (name) - dev_pm_opp_put_regulator(cpu_dev); + dev_pm_opp_put_regulator(opp_table); out_put_clk: clk_put(cpu_clk); @@ -300,7 +304,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy) dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); if (priv->reg_name) - dev_pm_opp_put_regulator(priv->cpu_dev); + dev_pm_opp_put_regulator(priv->opp_table); clk_put(policy->clk); kfree(priv); diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index bca26157f5b6..f6bc76501912 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -19,6 +19,7 @@ struct dev_pm_opp; struct device; +struct opp_table; enum dev_pm_opp_event { OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE, @@ -62,8 +63,8 @@ int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, void dev_pm_opp_put_supported_hw(struct device *dev); int dev_pm_opp_set_prop_name(struct device *dev, const char *name); void dev_pm_opp_put_prop_name(struct device *dev); -int dev_pm_opp_set_regulator(struct device *dev, const char *name); -void dev_pm_opp_put_regulator(struct device *dev); +struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name); +void dev_pm_opp_put_regulator(struct opp_table *opp_table); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); @@ -170,12 +171,12 @@ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) static inline void dev_pm_opp_put_prop_name(struct device *dev) {} -static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name) +static inline struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name) { - return -ENOTSUPP; + return ERR_PTR(-ENOTSUPP); } -static inline void dev_pm_opp_put_regulator(struct device *dev) {} +static inline void dev_pm_opp_put_regulator(struct opp_table *opp_table) {} static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) { -- GitLab From 7ac62bcde2d417777a40ced09d958f99ed9009c9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 1 Dec 2016 16:28:16 +0530 Subject: [PATCH 0060/1442] PM / OPP: Don't use OPP structure outside of rcu protected section commit dc39d06fcd7a4a82d72eae7b71e94e888b96d29e upstream. The OPP structure must not be used out of the rcu protected section. Cache the values to be used in separate variables instead. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Tested-by: Dave Gerlach Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/opp/core.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 2824d3a5e9f0..6441dfda489f 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -584,6 +584,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) struct clk *clk; unsigned long freq, old_freq; unsigned long u_volt, u_volt_min, u_volt_max; + unsigned long old_u_volt, old_u_volt_min, old_u_volt_max; int ret; if (unlikely(!target_freq)) { @@ -633,6 +634,14 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) return ret; } + if (IS_ERR(old_opp)) { + old_u_volt = 0; + } else { + old_u_volt = old_opp->u_volt; + old_u_volt_min = old_opp->u_volt_min; + old_u_volt_max = old_opp->u_volt_max; + } + u_volt = opp->u_volt; u_volt_min = opp->u_volt_min; u_volt_max = opp->u_volt_max; @@ -677,9 +686,10 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) __func__, old_freq); restore_voltage: /* This shouldn't harm even if the voltages weren't updated earlier */ - if (!IS_ERR(old_opp)) - _set_opp_voltage(dev, reg, old_opp->u_volt, - old_opp->u_volt_min, old_opp->u_volt_max); + if (old_u_volt) { + _set_opp_voltage(dev, reg, old_u_volt, old_u_volt_min, + old_u_volt_max); + } return ret; } -- GitLab From 67b0069a5175c14ddcf12c431bcf400ca23f9931 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 28 Oct 2016 17:18:48 -0700 Subject: [PATCH 0061/1442] blk-mq: Do not invoke .queue_rq() for a stopped queue commit bc27c01b5c46d3bfec42c96537c7a3fae0bb2cc4 upstream. The meaning of the BLK_MQ_S_STOPPED flag is "do not call .queue_rq()". Hence modify blk_mq_make_request() such that requests are queued instead of issued if a queue has been stopped. Reported-by: Ming Lei Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index f3d27a6dee09..ad459e4e8071 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1332,9 +1332,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_put_ctx(data.ctx); if (!old_rq) goto done; - if (!blk_mq_direct_issue_request(old_rq, &cookie)) - goto done; - blk_mq_insert_request(old_rq, false, true, true); + if (test_bit(BLK_MQ_S_STOPPED, &data.hctx->state) || + blk_mq_direct_issue_request(old_rq, &cookie) != 0) + blk_mq_insert_request(old_rq, false, true, true); goto done; } -- GitLab From 470b6910f7c1d492884fc4c741bd1011a8ac4205 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 23 Nov 2016 13:51:09 -0500 Subject: [PATCH 0062/1442] dm table: fix 'all_blk_mq' inconsistency when an empty table is loaded commit 6936c12cf809850180b24947271b8f068fdb15e9 upstream. An earlier DM multipath table could have been build ontop of underlying devices that were all using blk-mq. In that case, if that active multipath table is replaced with an empty DM multipath table (that reflects all paths have failed) then it is important that the 'all_blk_mq' state of the active table is transfered to the new empty DM table. Otherwise dm-rq.c:dm_old_prep_tio() will incorrectly clone a request that isn't needed by the DM multipath target when it is to issue IO to an underlying blk-mq device. Fixes: e83068a5 ("dm mpath: add optional "queue_mode" feature") Reported-by: Bart Van Assche Tested-by: Bart Van Assche Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-table.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index c4b53b332607..53b817b29134 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -924,12 +924,6 @@ static int dm_table_determine_type(struct dm_table *t) BUG_ON(!request_based); /* No targets in this table */ - if (list_empty(devices) && __table_type_request_based(live_md_type)) { - /* inherit live MD type */ - t->type = live_md_type; - return 0; - } - /* * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by * having a compatible target use dm_table_set_type. @@ -948,6 +942,19 @@ static int dm_table_determine_type(struct dm_table *t) return -EINVAL; } + if (list_empty(devices)) { + int srcu_idx; + struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx); + + /* inherit live table's type and all_blk_mq */ + if (live_table) { + t->type = live_table->type; + t->all_blk_mq = live_table->all_blk_mq; + } + dm_put_live_table(t->md, srcu_idx); + return 0; + } + /* Non-request-stackable devices can't be used for request-based dm */ list_for_each_entry(dd, devices, list) { struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev); -- GitLab From e74fb822281ecd034a381b0c3dd1b9d63caf1f25 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 7 Dec 2016 16:56:06 -0800 Subject: [PATCH 0063/1442] dm table: an 'all_blk_mq' table must be loaded for a blk-mq DM device commit 301fc3f5efb98633115bd887655b19f42c6dfaa8 upstream. When dm_table_set_type() is used by a target to establish a DM table's type (e.g. DM_TYPE_MQ_REQUEST_BASED in the case of DM multipath) the DM core must go on to verify that the devices in the table are compatible with the established type. Fixes: e83068a5 ("dm mpath: add optional "queue_mode" feature") Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-table.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 53b817b29134..5ac239d0f787 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -981,6 +981,11 @@ static int dm_table_determine_type(struct dm_table *t) t->all_blk_mq = true; } + if (t->type == DM_TYPE_MQ_REQUEST_BASED && !t->all_blk_mq) { + DMERR("table load rejected: all devices are not blk-mq request-stackable"); + return -EINVAL; + } + return 0; } -- GitLab From 3fae2a9e994b41603dbe1a5f32503cf558cf77f7 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Aug 2016 14:09:27 +0000 Subject: [PATCH 0064/1442] dm flakey: return -EINVAL on interval bounds error in flakey_ctr() commit bff7e067ee518f9ed7e1cbc63e4c9e01670d0b71 upstream. Fix to return error code -EINVAL instead of 0, as is done elsewhere in this function. Fixes: e80d1c805a3b ("dm: do not override error code returned from dm_get_device()") Signed-off-by: Wei Yongjun Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-flakey.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 6a2e8dd44a1b..3643cba71351 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -200,11 +200,13 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (!(fc->up_interval + fc->down_interval)) { ti->error = "Total (up + down) interval is zero"; + r = -EINVAL; goto bad; } if (fc->up_interval + fc->down_interval < fc->up_interval) { ti->error = "Interval overflow"; + r = -EINVAL; goto bad; } -- GitLab From 2c017f77e13d4325d8739fc9ed7ed2c3629da845 Mon Sep 17 00:00:00 2001 From: Ondrej Kozina Date: Wed, 2 Nov 2016 15:02:08 +0100 Subject: [PATCH 0065/1442] dm crypt: mark key as invalid until properly loaded commit 265e9098bac02bc5e36cda21fdbad34cb5b2f48d upstream. In crypt_set_key(), if a failure occurs while replacing the old key (e.g. tfm->setkey() fails) the key must not have DM_CRYPT_KEY_VALID flag set. Otherwise, the crypto layer would have an invalid key that still has DM_CRYPT_KEY_VALID flag set. Signed-off-by: Ondrej Kozina Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index a2768835d394..0aedd0ebccec 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1503,12 +1503,15 @@ static int crypt_set_key(struct crypt_config *cc, char *key) if (!cc->key_size && strcmp(key, "-")) goto out; + /* clear the flag since following operations may invalidate previously valid key */ + clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); + if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) goto out; - set_bit(DM_CRYPT_KEY_VALID, &cc->flags); - r = crypt_setkey_allcpus(cc); + if (!r) + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); out: /* Hex key string not needed after here, so wipe it. */ -- GitLab From e362c317ba76c9d397238737deef952187a21395 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 11 Nov 2016 17:05:27 -0800 Subject: [PATCH 0066/1442] dm rq: fix a race condition in rq_completed() commit d15bb3a6467e102e60d954aadda5fb19ce6fd8ec upstream. It is required to hold the queue lock when calling blk_run_queue_async() to avoid that a race between blk_run_queue_async() and blk_cleanup_queue() is triggered. Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-rq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 1d0d2adc050a..31a89c8832c0 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -226,6 +226,9 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) */ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) { + struct request_queue *q = md->queue; + unsigned long flags; + atomic_dec(&md->pending[rw]); /* nudge anyone waiting on suspend queue */ @@ -238,8 +241,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) * back into ->request_fn() could deadlock attempting to grab the * queue lock again. */ - if (!md->queue->mq_ops && run_queue) - blk_run_queue_async(md->queue); + if (!q->mq_ops && run_queue) { + spin_lock_irqsave(q->queue_lock, flags); + blk_run_queue_async(q); + spin_unlock_irqrestore(q->queue_lock, flags); + } /* * dm_put() must be at the end of this function. See the comment above -- GitLab From 461f272954cf7a86ce60963bc8b95a9f123b4036 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 29 Nov 2016 22:37:30 +0100 Subject: [PATCH 0067/1442] dm raid: fix discard support regression commit 11e2968478edc07a75ee1efb45011b3033c621c2 upstream. Commit ecbfb9f118 ("dm raid: add raid level takeover support") moved the configure_discard_support() call from raid_ctr() to raid_preresume(). Enabling/disabling discard _must_ happen during table load (through the .ctr hook). Fix this regression by moving the configure_discard_support() call back to raid_ctr(). Fixes: ecbfb9f118 ("dm raid: add raid level takeover support") Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-raid.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 6d53810963f7..af2d79b52484 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2994,6 +2994,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) } } + /* Disable/enable discard support on raid set. */ + configure_discard_support(rs); + mddev_unlock(&rs->md); return 0; @@ -3580,12 +3583,6 @@ static int raid_preresume(struct dm_target *ti) if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags)) rs_update_sbs(rs); - /* - * Disable/enable discard support on raid set after any - * conversion, because devices can have been added - */ - configure_discard_support(rs); - /* Load the bitmap from disk unless raid0 */ r = __load_dirty_region_bitmap(rs); if (r) -- GitLab From f5dca4881fac42e2db76b1d93a3b3c0895ba49bb Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Wed, 30 Nov 2016 17:56:14 -0600 Subject: [PATCH 0068/1442] dm space map metadata: fix 'struct sm_metadata' leak on failed create commit 314c25c56c1ee5026cf99c570bdfe01847927acb upstream. In dm_sm_metadata_create() we temporarily change the dm_space_map operations from 'ops' (whose .destroy function deallocates the sm_metadata) to 'bootstrap_ops' (whose .destroy function doesn't). If dm_sm_metadata_create() fails in sm_ll_new_metadata() or sm_ll_extend(), it exits back to dm_tm_create_internal(), which calls dm_sm_destroy() with the intention of freeing the sm_metadata, but it doesn't (because the dm_space_map operations is still set to 'bootstrap_ops'). Fix this by setting the dm_space_map operations back to 'ops' if dm_sm_metadata_create() fails when it is set to 'bootstrap_ops'. Signed-off-by: Benjamin Marzinski Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-space-map-metadata.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 7e44005595c1..20557e2c60c6 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -775,17 +775,15 @@ int dm_sm_metadata_create(struct dm_space_map *sm, memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); r = sm_ll_new_metadata(&smm->ll, tm); + if (!r) { + if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) + nr_blocks = DM_SM_METADATA_MAX_BLOCKS; + r = sm_ll_extend(&smm->ll, nr_blocks); + } + memcpy(&smm->sm, &ops, sizeof(smm->sm)); if (r) return r; - if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) - nr_blocks = DM_SM_METADATA_MAX_BLOCKS; - r = sm_ll_extend(&smm->ll, nr_blocks); - if (r) - return r; - - memcpy(&smm->sm, &ops, sizeof(smm->sm)); - /* * Now we need to update the newly created data structures with the * allocated blocks that they were built from. -- GitLab From 41c856b329008609157199552e767a7a02d62cd1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 25 Nov 2016 16:54:06 +0100 Subject: [PATCH 0069/1442] ASoC: intel: Fix crash at suspend/resume without card registration commit 2fc995a87f2efcd803438f07bfecd35cc3d90d32 upstream. When ASoC Intel SST Medfield driver is probed but without codec / card assigned, it causes an Oops and freezes the kernel at suspend/resume, PM: Suspending system (freeze) Suspending console(s) (use no_console_suspend to debug) BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: [] sst_soc_prepare+0x19/0xa0 [snd_soc_sst_mfld_platform] Oops: 0000 [#1] PREEMPT SMP CPU: 0 PID: 1552 Comm: systemd-sleep Tainted: G W 4.9.0-rc6-1.g5f5c2ad-default #1 Call Trace: [] dpm_prepare+0x209/0x460 [] dpm_suspend_start+0x11/0x60 [] suspend_devices_and_enter+0xb2/0x710 [] pm_suspend+0x30e/0x390 [] state_store+0x8a/0x90 [] kobj_attr_store+0xf/0x20 [] sysfs_kf_write+0x37/0x40 [] kernfs_fop_write+0x11c/0x1b0 [] __vfs_write+0x28/0x140 [] ? apparmor_file_permission+0x18/0x20 [] ? security_file_permission+0x3b/0xc0 [] vfs_write+0xb5/0x1a0 [] SyS_write+0x46/0xa0 [] entry_SYSCALL_64_fastpath+0x1e/0xad Add proper NULL checks in the PM code of mdfld driver. Signed-off-by: Takashi Iwai Acked-by: Vinod Koul Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/atom/sst-mfld-platform-pcm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 25c6d87c818e..f5a8050351b5 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -771,6 +771,9 @@ static int sst_soc_prepare(struct device *dev) struct sst_data *drv = dev_get_drvdata(dev); struct snd_soc_pcm_runtime *rtd; + if (!drv->soc_card) + return 0; + /* suspend all pcms first */ snd_soc_suspend(drv->soc_card->dev); snd_soc_poweroff(drv->soc_card->dev); @@ -793,6 +796,9 @@ static void sst_soc_complete(struct device *dev) struct sst_data *drv = dev_get_drvdata(dev); struct snd_soc_pcm_runtime *rtd; + if (!drv->soc_card) + return; + /* restart SSPs */ list_for_each_entry(rtd, &drv->soc_card->rtd_list, list) { struct snd_soc_dai *dai = rtd->cpu_dai; -- GitLab From 7aa58e7ad53bd9536aa49a18ccd0778c728bf57d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 12 Dec 2016 12:54:37 -0800 Subject: [PATCH 0070/1442] cifs: Fix smbencrypt() to stop pointing a scatterlist at the stack commit 06deeec77a5a689cc94b21a8a91a76e42176685d upstream. smbencrypt() points a scatterlist to the stack, which is breaks if CONFIG_VMAP_STACK=y. Fix it by switching to crypto_cipher_encrypt_one(). The new code should be considerably faster as an added benefit. This code is nearly identical to some code that Eric Biggers suggested. Reported-by: Eric Biggers Signed-off-by: Andy Lutomirski Acked-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smbencrypt.c | 40 ++++++++-------------------------------- 1 file changed, 8 insertions(+), 32 deletions(-) diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 699b7868108f..c12bffefa3c9 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -23,7 +23,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include +#include #include #include #include @@ -69,46 +69,22 @@ str_to_key(unsigned char *str, unsigned char *key) static int smbhash(unsigned char *out, const unsigned char *in, unsigned char *key) { - int rc; unsigned char key2[8]; - struct crypto_skcipher *tfm_des; - struct scatterlist sgin, sgout; - struct skcipher_request *req; + struct crypto_cipher *tfm_des; str_to_key(key, key2); - tfm_des = crypto_alloc_skcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC); + tfm_des = crypto_alloc_cipher("des", 0, 0); if (IS_ERR(tfm_des)) { - rc = PTR_ERR(tfm_des); - cifs_dbg(VFS, "could not allocate des crypto API\n"); - goto smbhash_err; - } - - req = skcipher_request_alloc(tfm_des, GFP_KERNEL); - if (!req) { - rc = -ENOMEM; cifs_dbg(VFS, "could not allocate des crypto API\n"); - goto smbhash_free_skcipher; + return PTR_ERR(tfm_des); } - crypto_skcipher_setkey(tfm_des, key2, 8); - - sg_init_one(&sgin, in, 8); - sg_init_one(&sgout, out, 8); + crypto_cipher_setkey(tfm_des, key2, 8); + crypto_cipher_encrypt_one(tfm_des, out, in); + crypto_free_cipher(tfm_des); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, &sgin, &sgout, 8, NULL); - - rc = crypto_skcipher_encrypt(req); - if (rc) - cifs_dbg(VFS, "could not encrypt crypt key rc: %d\n", rc); - - skcipher_request_free(req); - -smbhash_free_skcipher: - crypto_free_skcipher(tfm_des); -smbhash_err: - return rc; + return 0; } static int -- GitLab From 48f9526f4dcb4b132fe0dc2450835311e3b013a6 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 4 Nov 2016 11:50:31 -0700 Subject: [PATCH 0071/1442] CIFS: Fix a possible memory corruption during reconnect commit 53e0e11efe9289535b060a51d4cf37c25e0d0f2b upstream. We can not unlock/lock cifs_tcp_ses_lock while walking through ses and tcon lists because it can corrupt list iterator pointers and a tcon structure can be released if we don't hold an extra reference. Fix it by moving a reconnect process to a separate delayed work and acquiring a reference to every tcon that needs to be reconnected. Also do not send an echo request on newly established connections. Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsglob.h | 3 ++ fs/cifs/cifsproto.h | 3 ++ fs/cifs/connect.c | 34 +++++++++++++++----- fs/cifs/smb2pdu.c | 75 ++++++++++++++++++++++++++++++--------------- fs/cifs/smb2proto.h | 1 + 5 files changed, 85 insertions(+), 31 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 1f17f6bd7a60..64f5d8754fb5 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -646,6 +646,8 @@ struct TCP_Server_Info { unsigned int max_read; unsigned int max_write; __u8 preauth_hash[512]; + struct delayed_work reconnect; /* reconnect workqueue job */ + struct mutex reconnect_mutex; /* prevent simultaneous reconnects */ #endif /* CONFIG_CIFS_SMB2 */ unsigned long echo_interval; }; @@ -849,6 +851,7 @@ cap_unix(struct cifs_ses *ses) struct cifs_tcon { struct list_head tcon_list; int tc_count; + struct list_head rlist; /* reconnect list */ struct list_head openFileList; spinlock_t open_file_lock; /* protects list above */ struct cifs_ses *ses; /* pointer to session associated with */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index ced0e42ce460..cd8025a249bb 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -206,6 +206,9 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid, struct tcon_link *tlink, struct cifs_pending_open *open); extern void cifs_del_pending_open(struct cifs_pending_open *open); +extern void cifs_put_tcp_session(struct TCP_Server_Info *server, + int from_reconnect); +extern void cifs_put_tcon(struct cifs_tcon *tcon); #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) extern void cifs_dfs_release_automount_timer(void); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4547aeddd12b..893be0722643 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -52,6 +52,9 @@ #include "nterr.h" #include "rfc1002pdu.h" #include "fscache.h" +#ifdef CONFIG_CIFS_SMB2 +#include "smb2proto.h" +#endif #define CIFS_PORT 445 #define RFC1001_PORT 139 @@ -2100,8 +2103,8 @@ cifs_find_tcp_session(struct smb_vol *vol) return NULL; } -static void -cifs_put_tcp_session(struct TCP_Server_Info *server) +void +cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) { struct task_struct *task; @@ -2118,6 +2121,19 @@ cifs_put_tcp_session(struct TCP_Server_Info *server) cancel_delayed_work_sync(&server->echo); +#ifdef CONFIG_CIFS_SMB2 + if (from_reconnect) + /* + * Avoid deadlock here: reconnect work calls + * cifs_put_tcp_session() at its end. Need to be sure + * that reconnect work does nothing with server pointer after + * that step. + */ + cancel_delayed_work(&server->reconnect); + else + cancel_delayed_work_sync(&server->reconnect); +#endif + spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); @@ -2182,6 +2198,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info) INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); +#ifdef CONFIG_CIFS_SMB2 + INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server); + mutex_init(&tcp_ses->reconnect_mutex); +#endif memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr, sizeof(tcp_ses->srcaddr)); memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr, @@ -2340,7 +2360,7 @@ cifs_put_smb_ses(struct cifs_ses *ses) spin_unlock(&cifs_tcp_ses_lock); sesInfoFree(ses); - cifs_put_tcp_session(server); + cifs_put_tcp_session(server, 0); } #ifdef CONFIG_KEYS @@ -2514,7 +2534,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) mutex_unlock(&ses->session_mutex); /* existing SMB ses has a server reference already */ - cifs_put_tcp_session(server); + cifs_put_tcp_session(server, 0); free_xid(xid); return ses; } @@ -2604,7 +2624,7 @@ cifs_find_tcon(struct cifs_ses *ses, const char *unc) return NULL; } -static void +void cifs_put_tcon(struct cifs_tcon *tcon) { unsigned int xid; @@ -3792,7 +3812,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) else if (ses) cifs_put_smb_ses(ses); else - cifs_put_tcp_session(server); + cifs_put_tcp_session(server, 0); bdi_destroy(&cifs_sb->bdi); } @@ -4103,7 +4123,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info); if (IS_ERR(ses)) { tcon = (struct cifs_tcon *)ses; - cifs_put_tcp_session(master_tcon->ses->server); + cifs_put_tcp_session(master_tcon->ses->server, 0); goto out; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5ca5ea4668a1..eb3ac14ed466 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1972,6 +1972,54 @@ smb2_echo_callback(struct mid_q_entry *mid) add_credits(server, credits_received, CIFS_ECHO_OP); } +void smb2_reconnect_server(struct work_struct *work) +{ + struct TCP_Server_Info *server = container_of(work, + struct TCP_Server_Info, reconnect.work); + struct cifs_ses *ses; + struct cifs_tcon *tcon, *tcon2; + struct list_head tmp_list; + int tcon_exist = false; + + /* Prevent simultaneous reconnects that can corrupt tcon->rlist list */ + mutex_lock(&server->reconnect_mutex); + + INIT_LIST_HEAD(&tmp_list); + cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + if (tcon->need_reconnect) { + tcon->tc_count++; + list_add_tail(&tcon->rlist, &tmp_list); + tcon_exist = true; + } + } + } + /* + * Get the reference to server struct to be sure that the last call of + * cifs_put_tcon() in the loop below won't release the server pointer. + */ + if (tcon_exist) + server->srv_count++; + + spin_unlock(&cifs_tcp_ses_lock); + + list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) { + smb2_reconnect(SMB2_ECHO, tcon); + list_del_init(&tcon->rlist); + cifs_put_tcon(tcon); + } + + cifs_dbg(FYI, "Reconnecting tcons finished\n"); + mutex_unlock(&server->reconnect_mutex); + + /* now we can safely release srv struct */ + if (tcon_exist) + cifs_put_tcp_session(server, 1); +} + int SMB2_echo(struct TCP_Server_Info *server) { @@ -1984,32 +2032,11 @@ SMB2_echo(struct TCP_Server_Info *server) cifs_dbg(FYI, "In echo request\n"); if (server->tcpStatus == CifsNeedNegotiate) { - struct list_head *tmp, *tmp2; - struct cifs_ses *ses; - struct cifs_tcon *tcon; - - cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); - spin_lock(&cifs_tcp_ses_lock); - list_for_each(tmp, &server->smb_ses_list) { - ses = list_entry(tmp, struct cifs_ses, smb_ses_list); - list_for_each(tmp2, &ses->tcon_list) { - tcon = list_entry(tmp2, struct cifs_tcon, - tcon_list); - /* add check for persistent handle reconnect */ - if (tcon && tcon->need_reconnect) { - spin_unlock(&cifs_tcp_ses_lock); - rc = smb2_reconnect(SMB2_ECHO, tcon); - spin_lock(&cifs_tcp_ses_lock); - } - } - } - spin_unlock(&cifs_tcp_ses_lock); + /* No need to send echo on newly established connections */ + queue_delayed_work(cifsiod_wq, &server->reconnect, 0); + return rc; } - /* if no session, renegotiate failed above */ - if (server->tcpStatus == CifsNeedNegotiate) - return -EIO; - rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); if (rc) return rc; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index eb2cde2f64ba..f2d511a6971b 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -96,6 +96,7 @@ extern int smb2_open_file(const unsigned int xid, extern int smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); +extern void smb2_reconnect_server(struct work_struct *work); /* * SMB2 Worker functions - most of protocol specific implementation details -- GitLab From 69d13b69e79cb76413b49a66e43a2be39f14aefe Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 29 Nov 2016 11:30:58 -0800 Subject: [PATCH 0072/1442] CIFS: Fix missing nls unload in smb2_reconnect() commit 4772c79599564bd08ee6682715a7d3516f67433f upstream. Acked-by: Sachin Prabhu Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index eb3ac14ed466..4ba3f68a1766 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -280,7 +280,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) case SMB2_CHANGE_NOTIFY: case SMB2_QUERY_INFO: case SMB2_SET_INFO: - return -EAGAIN; + rc = -EAGAIN; } unload_nls(nls_codepage); return rc; -- GitLab From 46890ffba1d62550c45f4412378daca9586ba51b Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 29 Nov 2016 11:31:23 -0800 Subject: [PATCH 0073/1442] CIFS: Fix a possible double locking of mutex during reconnect commit 96a988ffeb90dba33a71c3826086fe67c897a183 upstream. With the current code it is possible to lock a mutex twice when a subsequent reconnects are triggered. On the 1st reconnect we reconnect sessions and tcons and then persistent file handles. If the 2nd reconnect happens during the reconnecting of persistent file handles then the following sequence of calls is observed: cifs_reopen_file -> SMB2_open -> small_smb2_init -> smb2_reconnect -> cifs_reopen_persistent_file_handles -> cifs_reopen_file (again!). So, we are trying to acquire the same cfile->fh_mutex twice which is wrong. Fix this by moving reconnecting of persistent handles to the delayed work (smb2_reconnect_server) and submitting this work every time we reconnect tcon in SMB2 commands handling codepath. This can also lead to corruption of a temporary file list in cifs_reopen_persistent_file_handles() because we can recursively call this function twice. Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsglob.h | 1 + fs/cifs/file.c | 8 +++++++- fs/cifs/smb2pdu.c | 14 +++++++++----- fs/cifs/smb2pdu.h | 2 ++ 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 64f5d8754fb5..203287f86525 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -925,6 +925,7 @@ struct cifs_tcon { bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ bool broken_sparse_sup; /* if server or share does not support sparse */ bool need_reconnect:1; /* connection reset, tid now invalid */ + bool need_reopen_files:1; /* need to reopen tcon file handles */ bool use_resilient:1; /* use resilient instead of durable handles */ bool use_persistent:1; /* use persistent instead of durable handles */ #ifdef CONFIG_CIFS_SMB2 diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7f5f6176c6f1..18a1e1d6671f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -777,6 +777,11 @@ cifs_reopen_persistent_handles(struct cifs_tcon *tcon) struct list_head *tmp1; struct list_head tmp_list; + if (!tcon->use_persistent || !tcon->need_reopen_files) + return; + + tcon->need_reopen_files = false; + cifs_dbg(FYI, "Reopen persistent handles"); INIT_LIST_HEAD(&tmp_list); @@ -793,7 +798,8 @@ cifs_reopen_persistent_handles(struct cifs_tcon *tcon) list_for_each_safe(tmp, tmp1, &tmp_list) { open_file = list_entry(tmp, struct cifsFileInfo, rlist); - cifs_reopen_file(open_file, false /* do not flush */); + if (cifs_reopen_file(open_file, false /* do not flush */)) + tcon->need_reopen_files = true; list_del_init(&open_file->rlist); cifsFileInfo_put(open_file); } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 4ba3f68a1766..87457227812c 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -250,16 +250,19 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } cifs_mark_open_files_invalid(tcon); + if (tcon->use_persistent) + tcon->need_reopen_files = true; rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); mutex_unlock(&tcon->ses->session_mutex); - if (tcon->use_persistent) - cifs_reopen_persistent_handles(tcon); - cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); if (rc) goto out; + + if (smb2_command != SMB2_INTERNAL_CMD) + queue_delayed_work(cifsiod_wq, &server->reconnect, 0); + atomic_inc(&tconInfoReconnectCount); out: /* @@ -1990,7 +1993,7 @@ void smb2_reconnect_server(struct work_struct *work) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { - if (tcon->need_reconnect) { + if (tcon->need_reconnect || tcon->need_reopen_files) { tcon->tc_count++; list_add_tail(&tcon->rlist, &tmp_list); tcon_exist = true; @@ -2007,7 +2010,8 @@ void smb2_reconnect_server(struct work_struct *work) spin_unlock(&cifs_tcp_ses_lock); list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) { - smb2_reconnect(SMB2_ECHO, tcon); + if (!smb2_reconnect(SMB2_INTERNAL_CMD, tcon)) + cifs_reopen_persistent_handles(tcon); list_del_init(&tcon->rlist); cifs_put_tcon(tcon); } diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index fd3709e8de33..dc0d141f33e2 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -80,6 +80,8 @@ #define SMB2_SET_INFO cpu_to_le16(SMB2_SET_INFO_HE) #define SMB2_OPLOCK_BREAK cpu_to_le16(SMB2_OPLOCK_BREAK_HE) +#define SMB2_INTERNAL_CMD cpu_to_le16(0xFFFF) + #define NUMBER_OF_SMB2_COMMANDS 0x0013 /* BB FIXME - analyze following length BB */ -- GitLab From 9f9d98246e5f7e70cf661be2253c75f4185b60a6 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 16 Nov 2016 15:17:15 -0800 Subject: [PATCH 0074/1442] CIFS: Decrease verbosity of ioctl call commit b0a752b5ce76bd1d8b733a53757c3263511dcb69 upstream. Reviewed-by: Aurelien Aptel Acked-by: Sachin Prabhu Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 9f51b81119f2..001528781b6b 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -189,7 +189,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) xid = get_xid(); cifs_sb = CIFS_SB(inode->i_sb); - cifs_dbg(VFS, "cifs ioctl 0x%x\n", command); + cifs_dbg(FYI, "cifs ioctl 0x%x\n", command); switch (command) { case FS_IOC_GETFLAGS: if (pSMBFile == NULL) -- GitLab From c954acc0007bf2a33a34505e21bc5d617a2fe2aa Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 29 Nov 2016 16:14:43 -0800 Subject: [PATCH 0075/1442] CIFS: Fix a possible memory corruption in push locks commit e3d240e9d505fc67f8f8735836df97a794bbd946 upstream. If maxBuf is not 0 but less than a size of SMB2 lock structure we can end up with a memory corruption. Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index f9e766f464be..b2aff0c6f22c 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -260,7 +260,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) * and check it for zero before using. */ max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf; - if (!max_buf) { + if (max_buf < sizeof(struct smb2_lock_element)) { free_xid(xid); return -EINVAL; } -- GitLab From 36b08b819713e587fa43b94516302dd8732dd27a Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 14 Dec 2016 15:04:04 -0800 Subject: [PATCH 0076/1442] kernel/watchdog: use nmi registers snapshot in hardlockup handler commit 4d1f0fb096aedea7bb5489af93498a82e467c480 upstream. NMI handler doesn't call set_irq_regs(), it's set only by normal IRQ. Thus get_irq_regs() returns NULL or stale registers snapshot with IP/SP pointing to the code interrupted by IRQ which was interrupted by NMI. NULL isn't a problem: in this case watchdog calls dump_stack() and prints full stack trace including NMI. But if we're stuck in IRQ handler then NMI watchlog will print stack trace without IRQ part at all. This patch uses registers snapshot passed into NMI handler as arguments: these registers point exactly to the instruction interrupted by NMI. Fixes: 55537871ef66 ("kernel/watchdog.c: perform all-CPU backtrace in case of hard lockup") Link: http://lkml.kernel.org/r/146771764784.86724.6006627197118544150.stgit@buzz Signed-off-by: Konstantin Khlebnikov Cc: Jiri Kosina Cc: Ulrich Obergfell Cc: Aaron Tomlin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/watchdog.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9acb29f280ec..6d1020c03d41 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -344,7 +344,6 @@ static void watchdog_overflow_callback(struct perf_event *event, */ if (is_hardlockup()) { int this_cpu = smp_processor_id(); - struct pt_regs *regs = get_irq_regs(); /* only print hardlockups once */ if (__this_cpu_read(hard_watchdog_warn) == true) -- GitLab From 2eccf0e0bcb1b164b93d83b4578a1f39ace3fff6 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 8 Nov 2016 17:55:52 +0200 Subject: [PATCH 0077/1442] watchdog: mei_wdt: request stop on reboot to prevent false positive event commit 9eff1140a82db8c5520f76e51c21827b4af670b3 upstream. Systemd on reboot enables shutdown watchdog that leaves the watchdog device open to ensure that even if power down process get stuck the platform reboots nonetheless. The iamt_wdt is an alarm-only watchdog and can't reboot system, but the FW will generate an alarm event reboot was completed in time, as the watchdog is not automatically disabled during power cycle. So we should request stop watchdog on reboot to eliminate wrong alarm from the FW. Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/mei_wdt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/watchdog/mei_wdt.c b/drivers/watchdog/mei_wdt.c index 630bd189f167..2a9d5cdedea2 100644 --- a/drivers/watchdog/mei_wdt.c +++ b/drivers/watchdog/mei_wdt.c @@ -389,6 +389,8 @@ static int mei_wdt_register(struct mei_wdt *wdt) wdt->wdd.max_timeout = MEI_WDT_MAX_TIMEOUT; watchdog_set_drvdata(&wdt->wdd, wdt); + watchdog_stop_on_reboot(&wdt->wdd); + ret = watchdog_register_device(&wdt->wdd); if (ret) { dev_err(dev, "unable to register watchdog device = %d.\n", ret); -- GitLab From 9b78d69054243a0d047ebccdd64cb8a2462e174e Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 14 Nov 2016 02:11:16 +0100 Subject: [PATCH 0078/1442] watchdog: qcom: fix kernel panic due to external abort on non-linefetch commit f06f35c66fdbd5ac38901a3305ce763a0cd59375 upstream. This patch fixes a off-by-one in the "watchdog: qcom: add option for standalone watchdog not in timer block" patch that causes the following panic on boot: > Unhandled fault: external abort on non-linefetch (0x1008) at 0xc8874002 > pgd = c0204000 > [c8874002] *pgd=87806811, *pte=0b017653, *ppte=0b017453 > Internal error: : 1008 [#1] SMP ARM > CPU: 2 PID: 1 Comm: swapper/0 Not tainted 4.8.6 #0 > Hardware name: Generic DT based system > PC is at 0xc02222f4 > LR is at 0x1 > pc : [] lr : [<00000001>] psr: 00000113 > sp : c782fc98 ip : 00000003 fp : 00000000 > r10: 00000004 r9 : c782e000 r8 : c04ab98c > r7 : 00000001 r6 : c8874002 r5 : c782fe00 r4 : 00000002 > r3 : 00000000 r2 : c782fe00 r1 : 00100000 r0 : c8874002 > Flags: nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none > Control: 10c5387d Table: 8020406a DAC: 00000051 > Process swapper/0 (pid: 1, stack limit = 0xc782e210) > Stack: (0xc782fc98 to 0xc7830000) > [...] The WDT_STS (status) needs to be translated via wdt_addr as well. fixes: f0d9d0f4b44a ("watchdog: qcom: add option for standalone watchdog not in timer block") Signed-off-by: Christian Lamparter Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/qcom-wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index 5796b5d1b3f2..4f47b5e90956 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -209,7 +209,7 @@ static int qcom_wdt_probe(struct platform_device *pdev) wdt->wdd.parent = &pdev->dev; wdt->layout = regs; - if (readl(wdt->base + WDT_STS) & 1) + if (readl(wdt_addr(wdt, WDT_STS)) & 1) wdt->wdd.bootstatus = WDIOF_CARDRESET; /* -- GitLab From f726f4f411f9094a3901fb35985f070c0fdec5b2 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 14 Dec 2016 15:05:49 -0800 Subject: [PATCH 0079/1442] kernel/debug/debug_core.c: more properly delay for secondary CPUs commit 2d13bb6494c807bcf3f78af0e96c0b8615a94385 upstream. We've got a delay loop waiting for secondary CPUs. That loop uses loops_per_jiffy. However, loops_per_jiffy doesn't actually mean how many tight loops make up a jiffy on all architectures. It is quite common to see things like this in the boot log: Calibrating delay loop (skipped), value calculated using timer frequency.. 48.00 BogoMIPS (lpj=24000) In my case I was seeing lots of cases where other CPUs timed out entering the debugger only to print their stack crawls shortly after the kdb> prompt was written. Elsewhere in kgdb we already use udelay(), so that should be safe enough to use to implement our timeout. We'll delay 1 ms for 1000 times, which should give us a full second of delay (just like the old code wanted) but allow us to notice that we're done every 1 ms. [akpm@linux-foundation.org: simplifications, per Daniel] Link: http://lkml.kernel.org/r/1477091361-2039-1-git-send-email-dianders@chromium.org Signed-off-by: Douglas Anderson Reviewed-by: Daniel Thompson Cc: Jason Wessel Cc: Brian Norris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/debug/debug_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 0874e2edd275..79517e5549f1 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -598,11 +598,11 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, /* * Wait for the other CPUs to be notified and be waiting for us: */ - time_left = loops_per_jiffy * HZ; + time_left = MSEC_PER_SEC; while (kgdb_do_roundup && --time_left && (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) != online_cpus) - cpu_relax(); + udelay(1000); if (!time_left) pr_crit("Timed out waiting for secondary CPUs.\n"); -- GitLab From b7bbf06c21aac5c91d3c7f73db291648232dfb39 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 26 Oct 2016 16:28:45 -0600 Subject: [PATCH 0080/1442] tpm xen: Remove bogus tpm_chip_unregister commit 1f0f30e404b3d8f4597a2d9b77fba55452f8fd0e upstream. tpm_chip_unregister can only be called after tpm_chip_register. devm manages the allocation so no unwind is needed here. Fixes: afb5abc262e96 ("tpm: two-phase chip management functions") Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/xen-tpmfront.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 62028f483bba..a2ab00831df1 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -307,7 +307,6 @@ static int tpmfront_probe(struct xenbus_device *dev, rv = setup_ring(dev, priv); if (rv) { chip = dev_get_drvdata(&dev->dev); - tpm_chip_unregister(chip); ring_free(priv); return rv; } -- GitLab From 45394bf3e11ea50a704f0c7cb97bfe87af3559f0 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 21 Nov 2016 09:56:06 -0500 Subject: [PATCH 0081/1442] xen/gntdev: Use VM_MIXEDMAP instead of VM_IO to avoid NUMA balancing commit 30faaafdfa0c754c91bac60f216c9f34a2bfdf7e upstream. Commit 9c17d96500f7 ("xen/gntdev: Grant maps should not be subject to NUMA balancing") set VM_IO flag to prevent grant maps from being subjected to NUMA balancing. It was discovered recently that this flag causes get_user_pages() to always fail with -EFAULT. check_vma_flags __get_user_pages __get_user_pages_locked __get_user_pages_unlocked get_user_pages_fast iov_iter_get_pages dio_refill_pages do_direct_IO do_blockdev_direct_IO do_blockdev_direct_IO ext4_direct_IO_read generic_file_read_iter aio_run_iocb (which can happen if guest's vdisk has direct-io-safe option). To avoid this let's use VM_MIXEDMAP flag instead --- it prevents NUMA balancing just as VM_IO does and has no effect on check_vma_flags(). Reported-by: Olaf Hering Suggested-by: Hugh Dickins Signed-off-by: Boris Ostrovsky Acked-by: Hugh Dickins Tested-by: Olaf Hering Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/xen/gntdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index bb952121ea94..2ef2b61b69df 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -1007,7 +1007,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_IO; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP; if (use_ptemod) vma->vm_flags |= VM_DONTCOPY; -- GitLab From e67053ad4840676f34215823770ba6d95037d138 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Wed, 7 Dec 2016 12:24:40 +0000 Subject: [PATCH 0082/1442] arm/xen: Use alloc_percpu rather than __alloc_percpu commit 24d5373dda7c00a438d26016bce140299fae675e upstream. The function xen_guest_init is using __alloc_percpu with an alignment which are not power of two. However, the percpu allocator never supported alignments which are not power of two and has always behaved incorectly in thise case. Commit 3ca45a4 "percpu: ensure requested alignment is power of two" introduced a check which trigger a warning [1] when booting linux-next on Xen. But in reality this bug was always present. This can be fixed by replacing the call to __alloc_percpu with alloc_percpu. The latter will use an alignment which are a power of two. [1] [ 0.023921] illegal size (48) or align (48) for percpu allocation [ 0.024167] ------------[ cut here ]------------ [ 0.024344] WARNING: CPU: 0 PID: 1 at linux/mm/percpu.c:892 pcpu_alloc+0x88/0x6c0 [ 0.024584] Modules linked in: [ 0.024708] [ 0.024804] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.9.0-rc7-next-20161128 #473 [ 0.025012] Hardware name: Foundation-v8A (DT) [ 0.025162] task: ffff80003d870000 task.stack: ffff80003d844000 [ 0.025351] PC is at pcpu_alloc+0x88/0x6c0 [ 0.025490] LR is at pcpu_alloc+0x88/0x6c0 [ 0.025624] pc : [] lr : [] pstate: 60000045 [ 0.025830] sp : ffff80003d847cd0 [ 0.025946] x29: ffff80003d847cd0 x28: 0000000000000000 [ 0.026147] x27: 0000000000000000 x26: 0000000000000000 [ 0.026348] x25: 0000000000000000 x24: 0000000000000000 [ 0.026549] x23: 0000000000000000 x22: 00000000024000c0 [ 0.026752] x21: ffff000008e97000 x20: 0000000000000000 [ 0.026953] x19: 0000000000000030 x18: 0000000000000010 [ 0.027155] x17: 0000000000000a3f x16: 00000000deadbeef [ 0.027357] x15: 0000000000000006 x14: ffff000088f79c3f [ 0.027573] x13: ffff000008f79c4d x12: 0000000000000041 [ 0.027782] x11: 0000000000000006 x10: 0000000000000042 [ 0.027995] x9 : ffff80003d847a40 x8 : 6f697461636f6c6c [ 0.028208] x7 : 6120757063726570 x6 : ffff000008f79c84 [ 0.028419] x5 : 0000000000000005 x4 : 0000000000000000 [ 0.028628] x3 : 0000000000000000 x2 : 000000000000017f [ 0.028840] x1 : ffff80003d870000 x0 : 0000000000000035 [ 0.029056] [ 0.029152] ---[ end trace 0000000000000000 ]--- [ 0.029297] Call trace: [ 0.029403] Exception stack(0xffff80003d847b00 to 0xffff80003d847c30) [ 0.029621] 7b00: 0000000000000030 0001000000000000 ffff80003d847cd0 ffff00000818e678 [ 0.029901] 7b20: 0000000000000002 0000000000000004 ffff000008f7c060 0000000000000035 [ 0.030153] 7b40: ffff000008f79000 ffff000008c4cd88 ffff80003d847bf0 ffff000008101778 [ 0.030402] 7b60: 0000000000000030 0000000000000000 ffff000008e97000 00000000024000c0 [ 0.030647] 7b80: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 0.030895] 7ba0: 0000000000000035 ffff80003d870000 000000000000017f 0000000000000000 [ 0.031144] 7bc0: 0000000000000000 0000000000000005 ffff000008f79c84 6120757063726570 [ 0.031394] 7be0: 6f697461636f6c6c ffff80003d847a40 0000000000000042 0000000000000006 [ 0.031643] 7c00: 0000000000000041 ffff000008f79c4d ffff000088f79c3f 0000000000000006 [ 0.031877] 7c20: 00000000deadbeef 0000000000000a3f [ 0.032051] [] pcpu_alloc+0x88/0x6c0 [ 0.032229] [] __alloc_percpu+0x18/0x20 [ 0.032409] [] xen_guest_init+0x174/0x2f4 [ 0.032591] [] do_one_initcall+0x38/0x130 [ 0.032783] [] kernel_init_freeable+0xe0/0x248 [ 0.032995] [] kernel_init+0x10/0x100 [ 0.033172] [] ret_from_fork+0x10/0x50 Reported-by: Wei Chen Link: https://lkml.org/lkml/2016/11/28/669 Signed-off-by: Julien Grall Signed-off-by: Stefano Stabellini Reviewed-by: Stefano Stabellini Signed-off-by: Greg Kroah-Hartman --- arch/arm/xen/enlighten.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index f193414d0f6f..4986dc0c1dff 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -372,8 +372,7 @@ static int __init xen_guest_init(void) * for secondary CPUs as they are brought up. * For uniformity we use VCPUOP_register_vcpu_info even on cpu0. */ - xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info), - sizeof(struct vcpu_info)); + xen_vcpu_info = alloc_percpu(struct vcpu_info); if (xen_vcpu_info == NULL) return -ENOMEM; -- GitLab From c11a13d6f5274167a7c44e08722162ae3acce437 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 8 Nov 2016 12:55:18 +1100 Subject: [PATCH 0083/1442] xfs: fix up xfs_swap_extent_forks inline extent handling commit 4dfce57db6354603641132fac3c887614e3ebe81 upstream. There have been several reports over the years of NULL pointer dereferences in xfs_trans_log_inode during xfs_fsr processes, when the process is doing an fput and tearing down extents on the temporary inode, something like: BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 PID: 29439 TASK: ffff880550584fa0 CPU: 6 COMMAND: "xfs_fsr" [exception RIP: xfs_trans_log_inode+0x10] #9 [ffff8800a57bbbe0] xfs_bunmapi at ffffffffa037398e [xfs] #10 [ffff8800a57bbce8] xfs_itruncate_extents at ffffffffa0391b29 [xfs] #11 [ffff8800a57bbd88] xfs_inactive_truncate at ffffffffa0391d0c [xfs] #12 [ffff8800a57bbdb8] xfs_inactive at ffffffffa0392508 [xfs] #13 [ffff8800a57bbdd8] xfs_fs_evict_inode at ffffffffa035907e [xfs] #14 [ffff8800a57bbe00] evict at ffffffff811e1b67 #15 [ffff8800a57bbe28] iput at ffffffff811e23a5 #16 [ffff8800a57bbe58] dentry_kill at ffffffff811dcfc8 #17 [ffff8800a57bbe88] dput at ffffffff811dd06c #18 [ffff8800a57bbea8] __fput at ffffffff811c823b #19 [ffff8800a57bbef0] ____fput at ffffffff811c846e #20 [ffff8800a57bbf00] task_work_run at ffffffff81093b27 #21 [ffff8800a57bbf30] do_notify_resume at ffffffff81013b0c #22 [ffff8800a57bbf50] int_signal at ffffffff8161405d As it turns out, this is because the i_itemp pointer, along with the d_ops pointer, has been overwritten with zeros when we tear down the extents during truncate. When the in-core inode fork on the temporary inode used by xfs_fsr was originally set up during the extent swap, we mistakenly looked at di_nextents to determine whether all extents fit inline, but this misses extents generated by speculative preallocation; we should be using if_bytes instead. This mistake corrupts the in-memory inode, and code in xfs_iext_remove_inline eventually gets bad inputs, causing it to memmove and memset incorrect ranges; this became apparent because the two values in ifp->if_u2.if_inline_ext[1] contained what should have been in d_ops and i_itemp; they were memmoved due to incorrect array indexing and then the original locations were zeroed with memset, again due to an array overrun. Fix this by properly using i_df.if_bytes to determine the number of extents, not di_nextents. Thanks to dchinner for looking at this with me and spotting the root cause. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_bmap_util.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 552465e011ec..47074e0c33f3 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1792,6 +1792,7 @@ xfs_swap_extent_forks( struct xfs_ifork tempifp, *ifp, *tifp; int aforkblks = 0; int taforkblks = 0; + xfs_extnum_t nextents; __uint64_t tmp; int error; @@ -1881,7 +1882,8 @@ xfs_swap_extent_forks( * pointer. Otherwise it's already NULL or * pointing to the extent. */ - if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { + nextents = ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } @@ -1900,7 +1902,8 @@ xfs_swap_extent_forks( * pointer. Otherwise it's already NULL or * pointing to the extent. */ - if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { + nextents = tip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } -- GitLab From e12096297ea55330cabe576596ad3f0b0fa4b8f4 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 5 Dec 2016 12:31:06 +1100 Subject: [PATCH 0084/1442] xfs: set AGI buffer type in xlog_recover_clear_agi_bucket commit 6b10b23ca94451fae153a5cc8d62fd721bec2019 upstream. xlog_recover_clear_agi_bucket didn't set the type to XFS_BLFT_AGI_BUF, so we got a warning during log replay (or an ASSERT on a debug build). XFS (md0): Unknown buffer type 0! XFS (md0): _xfs_buf_ioapply: no ops on block 0xaea8802/0x1 Fix this, as was done in f19b872b for 2 other locations with the same problem. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log_recover.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9b3d7c76915d..2d91f5ab7538 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -4929,6 +4929,7 @@ xlog_recover_clear_agi_bucket( agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket); + xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); -- GitLab From c728f2b5edf24bac22b0549e3a5b3622c35b9eba Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Thu, 24 Nov 2016 02:27:03 +0100 Subject: [PATCH 0085/1442] builddeb: fix cross-building to arm64 producing host-arch debs commit 152b695d74376bfe55cd2a6265ccc75b0d39dd19 upstream. Both Debian and kernel archs are "arm64" but UTS_MACHINE and gcc say "aarch64". Recognizing just the latter should be enough but let's accept both in case something regresses again or an user sets UTS_MACHINE=arm64. Regressed in cfa88c7: arm64: Set UTS_MACHINE in the Makefile. Signed-off-by: Adam Borowski Acked-by: Riku Voipio Signed-off-by: Michal Marek Signed-off-by: Greg Kroah-Hartman --- scripts/package/builddeb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index 8ea9fd2b6573..3c575cd07888 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -51,7 +51,7 @@ set_debarch() { debarch=hppa ;; mips*) debarch=mips$(grep -q CPU_LITTLE_ENDIAN=y $KCONFIG_CONFIG && echo el || true) ;; - arm64) + aarch64|arm64) debarch=arm64 ;; arm*) if grep -q CONFIG_AEABI=y $KCONFIG_CONFIG; then -- GitLab From 705df55bd0cf3530cc7e2b517f77d14585d3d24f Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Sun, 11 Dec 2016 02:09:18 +0100 Subject: [PATCH 0086/1442] x86/kbuild: enable modversions for symbols exported from asm commit 334bb773876403eae3457d81be0b8ea70f8e4ccc upstream. Commit 4efca4ed ("kbuild: modversions for EXPORT_SYMBOL() for asm") adds modversion support for symbols exported from asm files. Architectures must include C-style declarations for those symbols in asm/asm-prototypes.h in order for them to be versioned. Add these declarations for x86, and an architecture-independent file that can be used for common symbols. With f27c2f6 reverting 8ab2ae6 ("default exported asm symbols to zero") we produce a scary warning on x86, this commit fixes that. Signed-off-by: Adam Borowski Tested-by: Kalle Valo Acked-by: Nicholas Piggin Tested-by: Peter Wu Tested-by: Oliver Hartkopp Signed-off-by: Michal Marek Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/asm-prototypes.h | 16 ++++++++++++++++ include/asm-generic/asm-prototypes.h | 7 +++++++ 2 files changed, 23 insertions(+) create mode 100644 arch/x86/include/asm/asm-prototypes.h create mode 100644 include/asm-generic/asm-prototypes.h diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..44b8762fa0c7 --- /dev/null +++ b/arch/x86/include/asm/asm-prototypes.h @@ -0,0 +1,16 @@ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#ifndef CONFIG_X86_CMPXCHG64 +extern void cmpxchg8b_emu(void); +#endif diff --git a/include/asm-generic/asm-prototypes.h b/include/asm-generic/asm-prototypes.h new file mode 100644 index 000000000000..df13637e4017 --- /dev/null +++ b/include/asm-generic/asm-prototypes.h @@ -0,0 +1,7 @@ +#include +extern void *__memset(void *, int, __kernel_size_t); +extern void *__memcpy(void *, const void *, __kernel_size_t); +extern void *__memmove(void *, const void *, __kernel_size_t); +extern void *memset(void *, int, __kernel_size_t); +extern void *memcpy(void *, const void *, __kernel_size_t); +extern void *memmove(void *, const void *, __kernel_size_t); -- GitLab From 6437abdb624edc4e68859e66768933d2b5eb9f09 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 6 Jan 2017 10:40:28 +0100 Subject: [PATCH 0087/1442] Linux 4.9.1 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b1037774e8e8..ab3cd5128889 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 0 +SUBLEVEL = 1 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From b8425f41346610bd2137eee2c374227ee532e497 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 5 Nov 2016 14:08:57 -0500 Subject: [PATCH 0088/1442] ssb: Fix error routine when fallback SPROM fails commit 8052d7245b6089992343c80b38b14dbbd8354651 upstream. When there is a CRC error in the SPROM read from the device, the code attempts to handle a fallback SPROM. When this also fails, the driver returns zero rather than an error code. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/ssb/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c index 0f28c08fcb3c..77b551da5728 100644 --- a/drivers/ssb/pci.c +++ b/drivers/ssb/pci.c @@ -909,6 +909,7 @@ static int ssb_pci_sprom_get(struct ssb_bus *bus, if (err) { ssb_warn("WARNING: Using fallback SPROM failed (err %d)\n", err); + goto out_free; } else { ssb_dbg("Using SPROM revision %d provided by platform\n", sprom->revision); -- GitLab From ea23fca0c60045afbbeceef1926893d905eb1b36 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Wed, 30 Nov 2016 10:59:29 +0530 Subject: [PATCH 0089/1442] ath10k: fix soft lockup during firmware crash/hw-restart commit c2cac2f74ab4bcf0db0dcf3a612f1e5b52d145c8 upstream. During firmware crash (or) user requested manual restart the system gets into a soft lock up state because of the below root cause. During user requested hardware restart / firmware crash the system goes into a soft lockup state as 'napi_synchronize' is called after 'napi_disable' (which sets 'NAPI_STATE_SCHED' bit) and it sleeps into infinite loop as it waits for 'NAPI_STATE_SCHED' to be cleared. This condition is hit because 'ath10k_hif_stop' is called twice as below (resulting in calling 'napi_synchronize' after 'napi_disable') 'ath10k_core_restart' -> 'ath10k_hif_stop' (ATH10K_STATE_ON) -> -> 'ieee80211_restart_hw' -> 'ath10k_start' -> 'ath10k_halt' -> 'ath10k_core_stop' -> 'ath10k_hif_stop' (ATH10K_STATE_RESTARTING) Fix this by calling 'ath10k_halt' in ath10k_core_restart itself as it makes more sense before informing mac80211 to restart h/w Also remove 'ath10k_halt' in ath10k_start for the state of 'restarting' Fixes: 3c97f5de1f28 ("ath10k: implement NAPI support") Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/core.c | 2 +- drivers/net/wireless/ath/ath10k/mac.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 21ae8d663e67..0c4532227f25 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1534,7 +1534,7 @@ static void ath10k_core_restart(struct work_struct *work) switch (ar->state) { case ATH10K_STATE_ON: ar->state = ATH10K_STATE_RESTARTING; - ath10k_hif_stop(ar); + ath10k_halt(ar); ath10k_scan_finish(ar); ieee80211_restart_hw(ar->hw); break; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 76297d69f1ed..90eeb1c82e8b 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4449,7 +4449,6 @@ static int ath10k_start(struct ieee80211_hw *hw) ar->state = ATH10K_STATE_ON; break; case ATH10K_STATE_RESTARTING: - ath10k_halt(ar); ar->state = ATH10K_STATE_RESTARTED; break; case ATH10K_STATE_ON: -- GitLab From 98068574928f499b30f136ff57ef9a03cc575a36 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 26 Nov 2016 14:43:35 -0600 Subject: [PATCH 0090/1442] rtlwifi: Fix enter/exit power_save commit ba9f93f82abafe2552eac942ebb11c2df4f8dd7f upstream. In commit a5ffbe0a1993 ("rtlwifi: Fix scheduling while atomic bug") and commit a269913c52ad ("rtlwifi: Rework rtl_lps_leave() and rtl_lps_enter() to use work queue"), an error was introduced in the power-save routines due to the fact that leaving PS was delayed by the use of a work queue. This problem is fixed by detecting if the enter or leave routines are in interrupt mode. If so, the workqueue is used to place the request. If in normal mode, the enter or leave routines are called directly. Fixes: a269913c52ad ("rtlwifi: Rework rtl_lps_leave() and rtl_lps_enter() to use work queue") Reported-by: Ping-Ke Shih Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/base.c | 8 ++--- drivers/net/wireless/realtek/rtlwifi/core.c | 9 ++---- drivers/net/wireless/realtek/rtlwifi/pci.c | 14 +++----- drivers/net/wireless/realtek/rtlwifi/ps.c | 36 ++++++++++++++++----- 4 files changed, 40 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 264466f59c57..4ac928bf1f8e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1303,12 +1303,13 @@ EXPORT_SYMBOL_GPL(rtl_action_proc); static void setup_arp_tx(struct rtl_priv *rtlpriv, struct rtl_ps_ctl *ppsc) { + struct ieee80211_hw *hw = rtlpriv->hw; + rtlpriv->ra.is_special_data = true; if (rtlpriv->cfg->ops->get_btc_status()) rtlpriv->btcoexist.btc_ops->btc_special_packet_notify( rtlpriv, 1); - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); ppsc->last_delaylps_stamp_jiffies = jiffies; } @@ -1381,8 +1382,7 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx, if (is_tx) { rtlpriv->ra.is_special_data = true; - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); ppsc->last_delaylps_stamp_jiffies = jiffies; } diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 8e7f23c11680..4da4e458142c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -1150,10 +1150,8 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, } else { mstatus = RT_MEDIA_DISCONNECT; - if (mac->link_state == MAC80211_LINKED) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + if (mac->link_state == MAC80211_LINKED) + rtl_lps_leave(hw); if (ppsc->p2p_ps_info.p2p_ps_mode > P2P_PS_NONE) rtl_p2p_ps_cmd(hw, P2P_PS_DISABLE); mac->link_state = MAC80211_NOLINK; @@ -1431,8 +1429,7 @@ static void rtl_op_sw_scan_start(struct ieee80211_hw *hw, } if (mac->link_state == MAC80211_LINKED) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); mac->link_state = MAC80211_LINKED_SCANNING; } else { rtl_ips_nic_on(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 0dfa9eac3926..5be4fc96002d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -663,11 +663,9 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio) } if (((rtlpriv->link_info.num_rx_inperiod + - rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + rtlpriv->link_info.num_tx_inperiod) > 8) || + (rtlpriv->link_info.num_rx_inperiod > 2)) + rtl_lps_leave(hw); } static int _rtl_pci_init_one_rxdesc(struct ieee80211_hw *hw, @@ -918,10 +916,8 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) } if (((rtlpriv->link_info.num_rx_inperiod + rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + (rtlpriv->link_info.num_rx_inperiod > 2)) + rtl_lps_leave(hw); skb = new_skb; no_new: if (rtlpriv->use_new_trx_flow) { diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index 18d979affc18..d0ffc4d508cf 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -407,8 +407,8 @@ void rtl_lps_set_psmode(struct ieee80211_hw *hw, u8 rt_psmode) } } -/*Enter the leisure power save mode.*/ -void rtl_lps_enter(struct ieee80211_hw *hw) +/* Interrupt safe routine to enter the leisure power save mode.*/ +static void rtl_lps_enter_core(struct ieee80211_hw *hw) { struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); @@ -444,10 +444,9 @@ void rtl_lps_enter(struct ieee80211_hw *hw) spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); } -EXPORT_SYMBOL(rtl_lps_enter); -/*Leave the leisure power save mode.*/ -void rtl_lps_leave(struct ieee80211_hw *hw) +/* Interrupt safe routine to leave the leisure power save mode.*/ +static void rtl_lps_leave_core(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); @@ -477,7 +476,6 @@ void rtl_lps_leave(struct ieee80211_hw *hw) } spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); } -EXPORT_SYMBOL(rtl_lps_leave); /* For sw LPS*/ void rtl_swlps_beacon(struct ieee80211_hw *hw, void *data, unsigned int len) @@ -670,12 +668,34 @@ void rtl_lps_change_work_callback(struct work_struct *work) struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->enter_ps) - rtl_lps_enter(hw); + rtl_lps_enter_core(hw); else - rtl_lps_leave(hw); + rtl_lps_leave_core(hw); } EXPORT_SYMBOL_GPL(rtl_lps_change_work_callback); +void rtl_lps_enter(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + + if (!in_interrupt()) + return rtl_lps_enter_core(hw); + rtlpriv->enter_ps = true; + schedule_work(&rtlpriv->works.lps_change_work); +} +EXPORT_SYMBOL_GPL(rtl_lps_enter); + +void rtl_lps_leave(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + + if (!in_interrupt()) + return rtl_lps_leave_core(hw); + rtlpriv->enter_ps = false; + schedule_work(&rtlpriv->works.lps_change_work); +} +EXPORT_SYMBOL_GPL(rtl_lps_leave); + void rtl_swlps_wq_callback(void *data) { struct rtl_works *rtlworks = container_of_dwork_rtl(data, -- GitLab From 54fa190d0a548f7e403aa5d73e830b8aeecbf27b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 8 Dec 2016 16:14:17 -0800 Subject: [PATCH 0091/1442] perf/x86: Fix exclusion of BTS and LBR for Goldmont commit b0c1ef52959582144bbea9a2b37db7f4c9e399f7 upstream. An earlier patch allowed enabling PT and LBR at the same time on Goldmont. However it also allowed enabling BTS and LBR at the same time, which is still not supported. Fix this by bypassing the check only for PT. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: alexander.shishkin@intel.com Cc: kan.liang@intel.com Fixes: ccbebba4c6bf ("perf/x86/intel/pt: Bypass PT vs. LBR exclusivity if the core supports it") Link: http://lkml.kernel.org/r/20161209001417.4713-1-andi@firstfloor.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 8 ++++++-- arch/x86/events/perf_event.h | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6e395c996900..7fe88bb57e36 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -365,7 +365,11 @@ int x86_add_exclusive(unsigned int what) { int i; - if (x86_pmu.lbr_pt_coexist) + /* + * When lbr_pt_coexist we allow PT to coexist with either LBR or BTS. + * LBR and BTS are still mutually exclusive. + */ + if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) return 0; if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { @@ -388,7 +392,7 @@ int x86_add_exclusive(unsigned int what) void x86_del_exclusive(unsigned int what) { - if (x86_pmu.lbr_pt_coexist) + if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) return; atomic_dec(&x86_pmu.lbr_exclusive[what]); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index a77ee026643d..bcbb1d2ae10b 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -604,7 +604,7 @@ struct x86_pmu { u64 lbr_sel_mask; /* LBR_SELECT valid bits */ const int *lbr_sel_map; /* lbr_select mappings */ bool lbr_double_abort; /* duplicated lbr aborts */ - bool lbr_pt_coexist; /* LBR may coexist with PT */ + bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ /* * Intel PT/LBR/BTS are exclusive -- GitLab From ba12b35fbd69020d1fad79f0cead0fbd9ea4822a Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 22 Nov 2016 14:10:50 +0530 Subject: [PATCH 0092/1442] perf annotate: Don't throw error for zero length symbols commit edee44be59190bf22d5c6e521f3852b7ff16862f upstream. 'perf report --tui' exits with error when it finds a sample of zero length symbol (i.e. addr == sym->start == sym->end). Actually these are valid samples. Don't exit TUI and show report with such symbols. Reported-and-Tested-by: Anton Blanchard Link: https://lkml.org/lkml/2016/10/8/189 Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Benjamin Herrenschmidt Cc: Chris Riyder Cc: linuxppc-dev@lists.ozlabs.org Cc: Masami Hiramatsu Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1479804050-5028-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/annotate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index aeb5a441bd74..430d039d0079 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -593,7 +593,8 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); - if (addr < sym->start || addr >= sym->end) { + if ((addr < sym->start || addr >= sym->end) && + (addr != sym->end || sym->start != sym->end)) { pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, __LINE__, sym->name, sym->start, addr, sym->end); return -ERANGE; -- GitLab From 19d55b9f32b6888411c6d09090537dd1ddbf8025 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Dec 2016 11:02:08 +0100 Subject: [PATCH 0093/1442] perf/x86/intel/cstate: Prevent hotplug callback leak commit 834fcd298003c10ce450e66960c78893cb1cc4b5 upstream. If the pmu registration fails the registered hotplug callbacks are not removed. Wrong in any case, but fatal in case of a modular driver. Replace the nonsensical state names with proper ones while at it. Fixes: 77c34ef1c319 ("perf/x86/intel/cstate: Convert Intel CSTATE to hotplug state machine") Signed-off-by: Thomas Gleixner Cc: Sebastian Siewior Cc: Peter Zijlstra Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/cstate.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index da51e5a3e2ff..fec8a461bdef 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -594,6 +594,9 @@ static int __init cstate_probe(const struct cstate_model *cm) static inline void cstate_cleanup(void) { + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); + if (has_cstate_core) perf_pmu_unregister(&cstate_core_pmu); @@ -606,16 +609,16 @@ static int __init cstate_init(void) int err; cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING, - "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init, - NULL); + "perf/x86/cstate:starting", cstate_cpu_init, NULL); cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE, - "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit); + "perf/x86/cstate:online", NULL, cstate_cpu_exit); if (has_cstate_core) { err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); if (err) { has_cstate_core = false; pr_info("Failed to register cstate core pmu\n"); + cstate_cleanup(); return err; } } @@ -629,8 +632,7 @@ static int __init cstate_init(void) return err; } } - - return err; + return 0; } static int __init cstate_pmu_init(void) @@ -655,8 +657,6 @@ module_init(cstate_pmu_init); static void __exit cstate_pmu_exit(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); cstate_cleanup(); } module_exit(cstate_pmu_exit); -- GitLab From 3f23f7f21ac5a5e7b0d780d595c70f4242099485 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 29 Nov 2016 18:59:02 -0500 Subject: [PATCH 0094/1442] rtl8xxxu: Work around issue with 8192eu and 8723bu devices not reconnecting commit c59f13bbead475096bdfebc7ef59c12e180858de upstream. The H2C MEDIA_STATUS_RPT command for some reason causes 8192eu and 8723bu devices not being able to reconnect. Reported-by: Barry Day Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index a5e6ec2152bf..82d949ede294 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -4372,6 +4372,13 @@ void rtl8xxxu_gen1_report_connect(struct rtl8xxxu_priv *priv, void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv, u8 macid, bool connect) { +#ifdef RTL8XXXU_GEN2_REPORT_CONNECT + /* + * Barry Day reports this causes issues with 8192eu and 8723bu + * devices reconnecting. The reason for this is unclear, but + * until it is better understood, leave the code in place but + * disabled, so it is not lost. + */ struct h2c_cmd h2c; memset(&h2c, 0, sizeof(struct h2c_cmd)); @@ -4383,6 +4390,7 @@ void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv, h2c.media_status_rpt.parm &= ~BIT(0); rtl8xxxu_gen2_h2c_cmd(priv, &h2c, sizeof(h2c.media_status_rpt)); +#endif } void rtl8xxxu_gen1_init_aggregation(struct rtl8xxxu_priv *priv) -- GitLab From 1976c7689a58405e076d7443e4f9f4805d86d935 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Dec 2016 17:22:09 +0100 Subject: [PATCH 0095/1442] cfg80211/mac80211: fix BSS leaks when abandoning assoc attempts commit e6f462df9acd2a3295e5d34eb29e2823220cf129 upstream. When mac80211 abandons an association attempt, it may free all the data structures, but inform cfg80211 and userspace about it only by sending the deauth frame it received, in which case cfg80211 has no link to the BSS struct that was used and will not cfg80211_unhold_bss() it. Fix this by providing a way to inform cfg80211 of this with the BSS entry passed, so that it can clean up properly, and use this ability in the appropriate places in mac80211. This isn't ideal: some code is more or less duplicated and tracing is missing. However, it's a fairly small change and it's thus easier to backport - cleanups can come later. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- include/net/cfg80211.h | 11 +++++++++++ net/mac80211/mlme.c | 21 ++++++++++++--------- net/wireless/core.h | 1 + net/wireless/mlme.c | 12 ++++++++++++ net/wireless/sme.c | 14 ++++++++++++++ 5 files changed, 50 insertions(+), 9 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 14b51d739c3b..66167138120a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4583,6 +4583,17 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, */ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss); +/** + * cfg80211_abandon_assoc - notify cfg80211 of abandoned association attempt + * @dev: network device + * @bss: The BSS entry with which association was abandoned. + * + * Call this whenever - for reasons reported through other API, like deauth RX, + * an association attempt was abandoned. + * This function may sleep. The caller must hold the corresponding wdev's mutex. + */ +void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss); + /** * cfg80211_tx_mlme_mgmt - notification of transmitted deauth/disassoc frame * @dev: network device diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7486f2dab4ba..1118c61f835d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2510,7 +2510,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, } static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, - bool assoc) + bool assoc, bool abandon) { struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; @@ -2533,6 +2533,9 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); mutex_unlock(&sdata->local->mtx); + + if (abandon) + cfg80211_abandon_assoc(sdata->dev, assoc_data->bss); } kfree(assoc_data); @@ -2762,7 +2765,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, bssid, reason_code, ieee80211_get_reason_code_string(reason_code)); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); return; @@ -3167,14 +3170,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied association (code=%d)\n", mgmt->sa, status_code); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); event.u.mlme.status = MLME_DENIED; event.u.mlme.reason = status_code; drv_event_callback(sdata->local, sdata, &event); } else { if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); return; } @@ -3187,7 +3190,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, * recalc after assoc_data is NULL but before associated * is set can cause the interface to go idle */ - ieee80211_destroy_assoc_data(sdata, true); + ieee80211_destroy_assoc_data(sdata, true, false); /* get uapsd queues configuration */ uapsd_queues = 0; @@ -3886,7 +3889,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) .u.mlme.status = MLME_TIMEOUT, }; - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); drv_event_callback(sdata->local, sdata, &event); } @@ -4025,7 +4028,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DEAUTH_LEAVING, false, frame_buf); if (ifmgd->assoc_data) - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, @@ -4907,7 +4910,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code); @@ -4982,7 +4985,7 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) sdata_lock(sdata); if (ifmgd->assoc_data) { struct cfg80211_bss *bss = ifmgd->assoc_data->bss; - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); } if (ifmgd->auth_data) diff --git a/net/wireless/core.h b/net/wireless/core.h index f0c0c8a48c92..5f5867f90fed 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -410,6 +410,7 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev); void cfg80211_sme_deauth(struct wireless_dev *wdev); void cfg80211_sme_auth_timeout(struct wireless_dev *wdev); void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev); +void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index cbb48e26a871..76775a2b421d 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -149,6 +149,18 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) } EXPORT_SYMBOL(cfg80211_assoc_timeout); +void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + + cfg80211_sme_abandon_assoc(wdev); + + cfg80211_unhold_bss(bss_from_pub(bss)); + cfg80211_put_bss(wiphy, bss); +} +EXPORT_SYMBOL(cfg80211_abandon_assoc); + void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index a77db333927e..35cc1de85dcc 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -39,6 +39,7 @@ struct cfg80211_conn { CFG80211_CONN_ASSOCIATING, CFG80211_CONN_ASSOC_FAILED, CFG80211_CONN_DEAUTH, + CFG80211_CONN_ABANDON, CFG80211_CONN_CONNECTED, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; @@ -206,6 +207,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); + /* fall through */ + case CFG80211_CONN_ABANDON: /* free directly, disconnected event already sent */ cfg80211_sme_free(wdev); return 0; @@ -423,6 +426,17 @@ void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) schedule_work(&rdev->conn_work); } +void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_ABANDON; + schedule_work(&rdev->conn_work); +} + static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, const u8 *ies, size_t ies_len, const u8 **out_ies, size_t *out_ies_len) -- GitLab From 072ebf89ca2385028d85300af036aa1a7cf691df Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Tue, 15 Nov 2016 18:47:21 +0100 Subject: [PATCH 0096/1442] ath9k: fix ath9k_hw_gpio_get() to return 0 or 1 on success commit 91851cc7a939039bd401adb6ca3da4402bec1d0c upstream. Commit b2d70d4944c1 ("ath9k: make GPIO API to support both of WMAC and SOC") refactored ath9k_hw_gpio_get() to support both WMAC and SOC GPIOs, changing the return on success from 1 to BIT(gpio). This broke some callers like ath_is_rfkill_set(). This doesn't fix any known bug in mainline at the moment, but should be fixed anyway. Instead of fixing all callers, change ath9k_hw_gpio_get() back to only return 0 or 1. Fixes: b2d70d4944c1 ("ath9k: make GPIO API to support both of WMAC and SOC") Signed-off-by: Matthias Schiffer [kvalo@qca.qualcomm.com: mention that doesn't fix any known bug] Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 14b13f07cd1f..a35f78be8dec 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -2792,7 +2792,7 @@ u32 ath9k_hw_gpio_get(struct ath_hw *ah, u32 gpio) WARN_ON(1); } - return val; + return !!val; } EXPORT_SYMBOL(ath9k_hw_gpio_get); -- GitLab From de5d62f892c382aae83f454190db55e7be3536d6 Mon Sep 17 00:00:00 2001 From: "Vittorio Gambaletta (VittGam)" Date: Wed, 9 Nov 2016 03:40:56 +0200 Subject: [PATCH 0097/1442] ath9k: Really fix LED polarity for some Mini PCI AR9220 MB92 cards. commit 79e57dd113d307a6c74773b8aaecf5442068988a upstream. The active_high LED of my Wistron DNMA-92 is still being recognized as active_low on 4.7.6 mainline. When I was preparing my former commit 0f9edcdd88a9 ("ath9k: Fix LED polarity for some Mini PCI AR9220 MB92 cards.") to fix that I must have somehow messed up with testing, because I tested the final version of that patch before sending it, and it was apparently working; but now it is not working on 4.7.6 mainline. I initially added the PCI_DEVICE_SUB section for 0x0029/0x2096 above the PCI_VDEVICE section for 0x0029; but then I moved the former below the latter after seeing how 0x002A sections were sorted in the file. This turned out to be wrong: if a generic PCI_VDEVICE entry (that has both subvendor and subdevice IDs set to PCI_ANY_ID) is put before a more specific one (PCI_DEVICE_SUB), then the generic PCI_VDEVICE entry will match first and will be used. With this patch, 0x0029/0x2096 has finally got active_high LED on 4.7.6. While I'm at it, let's fix 0x002A too by also moving its generic definition below its specific ones. Fixes: 0f9edcdd88a9 ("ath9k: Fix LED polarity for some Mini PCI AR9220 MB92 cards.") Signed-off-by: Vittorio Gambaletta [kvalo@qca.qualcomm.com: improve the commit log based on email discussions] Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c index 0dd454acf22a..aff473dfa10d 100644 --- a/drivers/net/wireless/ath/ath9k/pci.c +++ b/drivers/net/wireless/ath/ath9k/pci.c @@ -26,7 +26,6 @@ static const struct pci_device_id ath_pci_id_table[] = { { PCI_VDEVICE(ATHEROS, 0x0023) }, /* PCI */ { PCI_VDEVICE(ATHEROS, 0x0024) }, /* PCI-E */ { PCI_VDEVICE(ATHEROS, 0x0027) }, /* PCI */ - { PCI_VDEVICE(ATHEROS, 0x0029) }, /* PCI */ #ifdef CONFIG_ATH9K_PCOEM /* Mini PCI AR9220 MB92 cards: Compex WLM200NX, Wistron DNMA-92 */ @@ -37,7 +36,7 @@ static const struct pci_device_id ath_pci_id_table[] = { .driver_data = ATH9K_PCI_LED_ACT_HI }, #endif - { PCI_VDEVICE(ATHEROS, 0x002A) }, /* PCI-E */ + { PCI_VDEVICE(ATHEROS, 0x0029) }, /* PCI */ #ifdef CONFIG_ATH9K_PCOEM { PCI_DEVICE_SUB(PCI_VENDOR_ID_ATHEROS, @@ -85,7 +84,11 @@ static const struct pci_device_id ath_pci_id_table[] = { 0x10CF, /* Fujitsu */ 0x1536), .driver_data = ATH9K_PCI_D3_L1_WAR }, +#endif + { PCI_VDEVICE(ATHEROS, 0x002A) }, /* PCI-E */ + +#ifdef CONFIG_ATH9K_PCOEM /* AR9285 card for Asus */ { PCI_DEVICE_SUB(PCI_VENDOR_ID_ATHEROS, 0x002B, -- GitLab From 71717a3ef900dafe6e9854320491de5b8e972b08 Mon Sep 17 00:00:00 2001 From: Tobias Klausmann Date: Tue, 13 Dec 2016 18:08:07 +0100 Subject: [PATCH 0098/1442] ath9k: do not return early to fix rcu unlocking commit d1f1c0e289e1bc46cd6873ba6dd6c627f459e7fa upstream. Starting with commit d94a461d7a7d ("ath9k: use ieee80211_tx_status_noskb where possible") the driver uses rcu_read_lock() && rcu_read_unlock(), yet on returning early in ath_tx_edma_tasklet() the unlock is missing leading to stalls and suspicious RCU usage: =============================== [ INFO: suspicious RCU usage. ] 4.9.0-rc8 #11 Not tainted ------------------------------- kernel/rcu/tree.c:705 Illegal idle entry in RCU read-side critical section.! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! 1 lock held by swapper/7/0: #0: ( rcu_read_lock ){......} , at: [] ath_tx_edma_tasklet+0x0/0x450 [ath9k] stack backtrace: CPU: 7 PID: 0 Comm: swapper/7 Not tainted 4.9.0-rc8 #11 Hardware name: Acer Aspire V3-571G/VA50_HC_CR, BIOS V2.21 12/16/2013 ffff88025efc3f38 ffffffff8132b1e5 ffff88017ede4540 0000000000000001 ffff88025efc3f68 ffffffff810a25f7 ffff88025efcee60 ffff88017edebdd8 ffff88025eeb5400 0000000000000091 ffff88025efc3f88 ffffffff810c3cd4 Call Trace: [] dump_stack+0x68/0x93 [] lockdep_rcu_suspicious+0xd7/0x110 [] rcu_eqs_enter_common.constprop.85+0x154/0x200 [] rcu_irq_exit+0x44/0xa0 [] irq_exit+0x61/0xd0 [] do_IRQ+0x65/0x110 [] common_interrupt+0x89/0x89 [] ? cpuidle_enter_state+0x151/0x200 [] cpuidle_enter+0x12/0x20 [] call_cpuidle+0x1e/0x40 [] cpu_startup_entry+0x146/0x220 [] start_secondary+0x148/0x170 Signed-off-by: Tobias Klausmann Fixes: d94a461d7a7d ("ath9k: use ieee80211_tx_status_noskb where possible") Acked-by: Felix Fietkau Acked-by: Paul E. McKenney Tested-by: Gabriel Craciunescu Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/xmit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 52bfbb988611..e47286bf378e 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -2787,7 +2787,7 @@ void ath_tx_edma_tasklet(struct ath_softc *sc) fifo_list = &txq->txq_fifo[txq->txq_tailidx]; if (list_empty(fifo_list)) { ath_txq_unlock(sc, txq); - return; + break; } bf = list_first_entry(fifo_list, struct ath_buf, list); -- GitLab From 5bb425033ef1783eec0a40d3846dca5a5e2c03a8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 2 Dec 2016 15:14:19 +0200 Subject: [PATCH 0099/1442] Revert "mmc: sdhci: Reset cmd and data circuits after tuning failure" commit 2ca71c27eeaeddae38efe24a84b20e22708a3d1d upstream. This reverts commit fe5fb2e3b58f ("mmc: sdhci: Reset cmd and data circuits after tuning failure"). A better fix is available, and it will be applied to older stable releases, so get this out of the way by reverting it. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 42ef3ebb1d8c..7f80d185a5cc 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2086,10 +2086,6 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) if (!host->tuning_done) { pr_info(DRIVER_NAME ": Timeout waiting for Buffer Read Ready interrupt during tuning procedure, falling back to fixed sampling clock\n"); - - sdhci_do_reset(host, SDHCI_RESET_CMD); - sdhci_do_reset(host, SDHCI_RESET_DATA); - ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); ctrl &= ~SDHCI_CTRL_TUNED_CLK; ctrl &= ~SDHCI_CTRL_EXEC_TUNING; -- GitLab From cee935845420e2ffe81293d396ad44a5630ba743 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 2 Dec 2016 15:14:20 +0200 Subject: [PATCH 0100/1442] mmc: sdhci: Fix recovery from tuning timeout commit 61e53bd0047d58caee0c7170613045bf96de4458 upstream. Clearing the tuning bits should reset the tuning circuit. However there is more to do. Reset the command and data lines for good measure, and then for eMMC ensure the card is not still trying to process a tuning command by sending a stop command. Note the JEDEC eMMC specification says the stop command (CMD12) can be used to stop a tuning command (CMD21) whereas the SD specification is silent on the subject with respect to the SD tuning command (CMD19). Considering that CMD12 is not a valid SDIO command, the stop command is sent only when the tuning command is CMD21 i.e. for eMMC. That addresses cases seen so far which have been on eMMC. Note that this replaces the commit fe5fb2e3b58f ("mmc: sdhci: Reset cmd and data circuits after tuning failure") which is being reverted for v4.9+. Signed-off-by: Adrian Hunter Tested-by: Dan O'Donovan Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 7f80d185a5cc..e1e274a0a34f 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2091,7 +2091,27 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) ctrl &= ~SDHCI_CTRL_EXEC_TUNING; sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); + sdhci_do_reset(host, SDHCI_RESET_CMD); + sdhci_do_reset(host, SDHCI_RESET_DATA); + err = -EIO; + + if (cmd.opcode != MMC_SEND_TUNING_BLOCK_HS200) + goto out; + + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); + + spin_unlock_irqrestore(&host->lock, flags); + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = MMC_STOP_TRANSMISSION; + cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; + cmd.busy_timeout = 50; + mmc_wait_for_cmd(mmc, &cmd, 0); + + spin_lock_irqsave(&host->lock, flags); + goto out; } -- GitLab From dbcde92b1b44011c3993ba7a2ee2469a9350ffd3 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 12 Nov 2016 15:22:38 +0100 Subject: [PATCH 0101/1442] regulator: stw481x-vmmc: fix ages old enable error commit 295070e9aa015abb9b92cccfbb1e43954e938133 upstream. The regulator has never been properly enabled, it has been dormant all the time. It's strange that MMC was working at all, but it likely worked by the signals going through the levelshifter and reaching the card anyways. Fixes: 3615a34ea1a6 ("regulator: add STw481x VMMC driver") Signed-off-by: Linus Walleij Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/stw481x-vmmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/stw481x-vmmc.c b/drivers/regulator/stw481x-vmmc.c index 7d2ae3e9e942..342f5da79975 100644 --- a/drivers/regulator/stw481x-vmmc.c +++ b/drivers/regulator/stw481x-vmmc.c @@ -47,7 +47,8 @@ static struct regulator_desc vmmc_regulator = { .volt_table = stw481x_vmmc_voltages, .enable_time = 200, /* FIXME: look this up */ .enable_reg = STW_CONF1, - .enable_mask = STW_CONF1_PDN_VMMC, + .enable_mask = STW_CONF1_PDN_VMMC | STW_CONF1_MMC_LS_STATUS, + .enable_val = STW_CONF1_PDN_VMMC, .vsel_reg = STW_CONF1, .vsel_mask = STW_CONF1_VMMC_MASK, }; -- GitLab From efd16f76f4da8293e8ccd196a20dd2f52042227a Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 11 Nov 2016 14:22:36 +0000 Subject: [PATCH 0102/1442] mmc: sd: Meet alignment requirements for raw_ssr DMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e85baa8868b016513c0f5738362402495b1a66a5 upstream. The mmc_read_ssr() function results in DMA to the raw_ssr member of struct mmc_card, which is not guaranteed to be cache line aligned & thus might not meet the requirements set out in Documentation/DMA-API.txt: Warnings: Memory coherency operates at a granularity called the cache line width. In order for memory mapped by this API to operate correctly, the mapped region must begin exactly on a cache line boundary and end exactly on one (to prevent two separately mapped regions from sharing a single cache line). Since the cache line size may not be known at compile time, the API will not enforce this requirement. Therefore, it is recommended that driver writers who don't take special care to determine the cache line size at run time only map virtual regions that begin and end on page boundaries (which are guaranteed also to be cache line boundaries). On some systems where DMA is non-coherent this can lead to us losing data that shares cache lines with the raw_ssr array. Fix this by kmalloc'ing a temporary buffer to perform DMA into. kmalloc will ensure the buffer is suitably aligned, allowing the DMA to be performed without any loss of data. Signed-off-by: Paul Burton Fixes: 5275a652d296 ("mmc: sd: Export SD Status via “ssr” device attribute") Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/sd.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 73c762a28dfe..f6f40a1673ae 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -223,6 +223,7 @@ static int mmc_decode_scr(struct mmc_card *card) static int mmc_read_ssr(struct mmc_card *card) { unsigned int au, es, et, eo; + u32 *raw_ssr; int i; if (!(card->csd.cmdclass & CCC_APP_SPEC)) { @@ -231,14 +232,21 @@ static int mmc_read_ssr(struct mmc_card *card) return 0; } - if (mmc_app_sd_status(card, card->raw_ssr)) { + raw_ssr = kmalloc(sizeof(card->raw_ssr), GFP_KERNEL); + if (!raw_ssr) + return -ENOMEM; + + if (mmc_app_sd_status(card, raw_ssr)) { pr_warn("%s: problem reading SD Status register\n", mmc_hostname(card->host)); + kfree(raw_ssr); return 0; } for (i = 0; i < 16; i++) - card->raw_ssr[i] = be32_to_cpu(card->raw_ssr[i]); + card->raw_ssr[i] = be32_to_cpu(raw_ssr[i]); + + kfree(raw_ssr); /* * UNSTUFF_BITS only works with four u32s so we have to offset the -- GitLab From ca22975afa144d5bb648dcff22f598d623493e9f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 8 Dec 2016 20:49:32 +0000 Subject: [PATCH 0103/1442] timekeeping_Force_unsigned_clocksource_to_nanoseconds_conversion commit 9c1645727b8fa90d07256fdfcc45bf831242a3ab upstream. The clocksource delta to nanoseconds conversion is using signed math, but the delta is unsigned. This makes the conversion space smaller than necessary and in case of a multiplication overflow the conversion can become negative. The conversion is done with scaled math: s64 nsec_delta = ((s64)clkdelta * clk->mult) >> clk->shift; Shifting a signed integer right obvioulsy preserves the sign, which has interesting consequences: - Time jumps backwards - __iter_div_u64_rem() which is used in one of the calling code pathes will take forever to piecewise calculate the seconds/nanoseconds part. This has been reported by several people with different scenarios: David observed that when stopping a VM with a debugger: "It was essentially the stopped by debugger case. I forget exactly why, but the guest was being explicitly stopped from outside, it wasn't just scheduling lag. I think it was something in the vicinity of 10 minutes stopped." When lifting the stop the machine went dead. The stopped by debugger case is not really interesting, but nevertheless it would be a good thing not to die completely. But this was also observed on a live system by Liav: "When the OS is too overloaded, delta will get a high enough value for the msb of the sum delta * tkr->mult + tkr->xtime_nsec to be set, and so after the shift the nsec variable will gain a value similar to 0xffffffffff000000." Unfortunately this has been reintroduced recently with commit 6bd58f09e1d8 ("time: Add cycles to nanoseconds translation"). It had been fixed a year ago already in commit 35a4933a8959 ("time: Avoid signed overflow in timekeeping_get_ns()"). Though it's not surprising that the issue has been reintroduced because the function itself and the whole call chain uses s64 for the result and the propagation of it. The change in this recent commit is subtle: s64 nsec; - nsec = (d * m + n) >> s: + nsec = d * m + n; + nsec >>= s; d being type of cycle_t adds another level of obfuscation. This wouldn't have happened if the previous change to unsigned computation would have made the 'nsec' variable u64 right away and a follow up patch had cleaned up the whole call chain. There have been patches submitted which basically did a revert of the above patch leaving everything else unchanged as signed. Back to square one. This spawned a admittedly pointless discussion about potential users which rely on the unsigned behaviour until someone pointed out that it had been fixed before. The changelogs of said patches added further confusion as they made finally false claims about the consequences for eventual users which expect signed results. Despite delta being cycle_t, aka. u64, it's very well possible to hand in a signed negative value and the signed computation will happily return the correct result. But nobody actually sat down and analyzed the code which was added as user after the propably unintended signed conversion. Though in sensitive code like this it's better to analyze it proper and make sure that nothing relies on this than hunting the subtle wreckage half a year later. After analyzing all call chains it stands that no caller can hand in a negative value (which actually would work due to the s64 cast) and rely on the signed math to do the right thing. Change the conversion function to unsigned math. The conversion of all call chains is done in a follow up patch. This solves the starvation issue, which was caused by the negative result, but it does not solve the underlying problem. It merily procrastinates it. When the timekeeper update is deferred long enough that the unsigned multiplication overflows, then time going backwards is observable again. It does neither solve the issue of clocksources with a small counter width which will wrap around possibly several times and cause random time stamps to be generated. But those are usually not found on systems used for virtualization, so this is likely a non issue. I took the liberty to claim authorship for this simply because analyzing all callsites and writing the changelog took substantially more time than just making the simple s/s64/u64/ change and ignore the rest. Fixes: 6bd58f09e1d8 ("time: Add cycles to nanoseconds translation") Reported-by: David Gibson Reported-by: Liav Rehana Signed-off-by: Thomas Gleixner Reviewed-by: David Gibson Acked-by: Peter Zijlstra (Intel) Cc: Parit Bhargava Cc: Laurent Vivier Cc: "Christopher S. Hall" Cc: Chris Metcalf Cc: Richard Cochran Cc: John Stultz Link: http://lkml.kernel.org/r/20161208204228.688545601@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/timekeeping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 37dec7e3db43..46e312e9be38 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -299,10 +299,10 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset; static inline u32 arch_gettimeoffset(void) { return 0; } #endif -static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr, +static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, cycle_t delta) { - s64 nsec; + u64 nsec; nsec = delta * tkr->mult + tkr->xtime_nsec; nsec >>= tkr->shift; -- GitLab From acd0993f8e97968b97291060ec553e4550f816fc Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 23 Nov 2016 23:21:17 +0100 Subject: [PATCH 0104/1442] gpio: stmpe: fix interrupt handling bug commit 1516c6350aa2770b8a5e36d40c3ec5078f92ba70 upstream. commit 43db289d00c6 ("gpio: stmpe: Rework registers access") reworked the STMPE register access so as to use [STMPE_IDX_*_LSB + i] to access the 8bit register for a certain bank, assuming the CSB and MSB will follow after the enumerator. For this to work the index needs to go from (size-1) to 0 not 0 to (size-1). However for the GPIO IRQ handler, the status registers we read register MSB + 3 bytes ahead for the 24 bit GPIOs and index registers from MSB upwards and run an index i over the registers UNLESS we are STMPE1600. This is not working when we get to clearing the interrupt EDGE status register STMPE_IDX_GPEDR_[LCM]SB: it is indexed like all other registers [STMPE_IDX_*_LSB + i] but in this loop we index from 0 to get the right bank index for the calculations, and we need to just add i to the MSB. Before this, interrupts on the STMPE2401 were broken, this patch fixes it so it works again. Cc: Patrice Chotard Fixes: 43db289d00c6 ("gpio: stmpe: Rework registers access") Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-stmpe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index 5b0042776ec7..adba614b3965 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -413,7 +413,7 @@ static irqreturn_t stmpe_gpio_irq(int irq, void *dev) stmpe->partnum != STMPE1801) { stmpe_reg_write(stmpe, statmsbreg + i, status[i]); stmpe_reg_write(stmpe, - stmpe->regs[STMPE_IDX_GPEDR_LSB + i], + stmpe->regs[STMPE_IDX_GPEDR_MSB] + i, status[i]); } } -- GitLab From 5f654078b760104019e851247ae84d5498ff8a0e Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 30 Nov 2016 13:05:21 +0100 Subject: [PATCH 0105/1442] gpio: chardev: Return error for seek operations commit f4e81c529767b9a33d1b27695c54dc84a14af30d upstream. The GPIO chardev is used for management tasks (allocating line and event handles) and does neither support read() nor write() operations. Hence it does not make much sense to allow seek operations. Currently the chardev uses noop_llseek() for its seek implementation. This function does not move the pointer and simply returns the current position (always 0 for the GPIO chardev). noop_llseek() is primarily meant for devices that can not support seek, but where there might be a user that depends on the seek() operation succeeding. For newly added devices that can not support seek operations it is recommended to use no_llseek(), which will return an error. For more information see commit 6038f373a3dc ("llseek: automatically add .llseek fop"). Unfortunately this was overlooked when the GPIO chardev ABI was introduced. But it is highly unlikely that since then userspace applications have appeared that rely on being able to perform non-failing seek operations on a GPIO chardev file descriptor. So it should be safe to change from noop_llseel() to no_seek(). Also use nonseekable_open() in the chardev open() callback to clear the FMODE_SEEK, FMODE_PREAD and FMODE_PWRITE flags from the file. Neither of these should be set on a file that does not support seek operations. Fixes: 3c702e9987e2 ("gpio: add a userspace chardev ABI for GPIOs") Signed-off-by: Lars-Peter Clausen Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 868128a676ba..90621fb93941 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -986,7 +986,8 @@ static int gpio_chrdev_open(struct inode *inode, struct file *filp) return -ENODEV; get_device(&gdev->dev); filp->private_data = gdev; - return 0; + + return nonseekable_open(inode, filp); } /** @@ -1011,7 +1012,7 @@ static const struct file_operations gpio_fileops = { .release = gpio_chrdev_release, .open = gpio_chrdev_open, .owner = THIS_MODULE, - .llseek = noop_llseek, + .llseek = no_llseek, .unlocked_ioctl = gpio_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = gpio_ioctl_compat, -- GitLab From b6d666315b999df88a5fa5da65dda992dbe4556b Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 2 Dec 2016 20:57:55 +0100 Subject: [PATCH 0106/1442] arm64: tegra: Add VDD_GPU regulator to Jetson TX1 commit 5e6b9a89afceadb1ee45472098f7d20af260335c upstream. Add the VDD_GPU regulator (a GPIO-enabled PWM regulator) to the Jetson TX1 board. This addition allows the GPU to be used provided the bootloader properly enabled the GPU node. Signed-off-by: Alexandre Courbot Signed-off-by: Thierry Reding [as pointed out by Thierry on IRC, nobody has reported a bug in the field, but using a new bootloader with a .dtb that has the incorrect data, it will crash on boot] Fixes: 336f79c7b6d7 ("arm64: tegra: Add NVIDIA Jetson TX1 Developer Kit support") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi index 5fda583351d7..906fb836d241 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi @@ -21,6 +21,10 @@ reg = <0x0 0x80000000 0x1 0x0>; }; + gpu@57000000 { + vdd-supply = <&vdd_gpu>; + }; + /* debug port */ serial@70006000 { status = "okay"; @@ -291,4 +295,18 @@ clock-frequency = <32768>; }; }; + + regulators { + vdd_gpu: regulator@100 { + compatible = "pwm-regulator"; + reg = <100>; + pwms = <&pwm 1 4880>; + regulator-name = "VDD_GPU"; + regulator-min-microvolt = <710000>; + regulator-max-microvolt = <1320000>; + enable-gpios = <&pmic 6 GPIO_ACTIVE_HIGH>; + regulator-ramp-delay = <80>; + regulator-enable-ramp-delay = <1000>; + }; + }; }; -- GitLab From 05bc2071b11b0c422d36fae33e701cf297563f04 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 1 Dec 2016 20:27:21 +0100 Subject: [PATCH 0107/1442] clk: bcm2835: Avoid overwriting the div info when disabling a pll_div clk commit 68af4fa8f39b542a6cde7ac19518d88e9b3099dc upstream. bcm2835_pll_divider_off() is resetting the divider field in the A2W reg to zero when disabling the clock. Make sure we preserve this value by reading the previous a2w_reg value first and ORing the result with A2W_PLL_CHANNEL_DISABLE. Signed-off-by: Boris Brezillon Fixes: 41691b8862e2 ("clk: bcm2835: Add support for programming the audio domain clocks") Reviewed-by: Eric Anholt Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/bcm/clk-bcm2835.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 8c7763fd9efc..3bbd2a58db47 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -751,7 +751,9 @@ static void bcm2835_pll_divider_off(struct clk_hw *hw) cprman_write(cprman, data->cm_reg, (cprman_read(cprman, data->cm_reg) & ~data->load_mask) | data->hold_mask); - cprman_write(cprman, data->a2w_reg, A2W_PLL_CHANNEL_DISABLE); + cprman_write(cprman, data->a2w_reg, + cprman_read(cprman, data->a2w_reg) | + A2W_PLL_CHANNEL_DISABLE); spin_unlock(&cprman->regs_lock); } -- GitLab From fab303ba78ee2bae3da057f750139fc6222c66d5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 22 Nov 2016 19:22:44 +0200 Subject: [PATCH 0108/1442] thermal: hwmon: Properly report critical temperature in sysfs commit f37fabb8643eaf8e3b613333a72f683770c85eca upstream. In the critical sysfs entry the thermal hwmon was returning wrong temperature to the user-space. It was reporting the temperature of the first trip point instead of the temperature of critical trip point. For example: /sys/class/hwmon/hwmon0/temp1_crit:50000 /sys/class/thermal/thermal_zone0/trip_point_0_temp:50000 /sys/class/thermal/thermal_zone0/trip_point_0_type:active /sys/class/thermal/thermal_zone0/trip_point_3_temp:120000 /sys/class/thermal/thermal_zone0/trip_point_3_type:critical Since commit e68b16abd91d ("thermal: add hwmon sysfs I/F") the driver have been registering a sysfs entry if get_crit_temp() callback was provided. However when accessed, it was calling get_trip_temp() instead of the get_crit_temp(). Fixes: e68b16abd91d ("thermal: add hwmon sysfs I/F") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Zhang Rui Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/thermal_hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index c41c7742903a..2dcd4194d103 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -98,7 +98,7 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf) int temperature; int ret; - ret = tz->ops->get_trip_temp(tz, 0, &temperature); + ret = tz->ops->get_crit_temp(tz, &temperature); if (ret) return ret; -- GitLab From 5bd6ccd9c95fdf068e125e08992c7424eb2ceabb Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Sun, 18 Dec 2016 13:11:46 +0300 Subject: [PATCH 0109/1442] docs: sphinx-extensions: make rstFlatTable work with docutils 0.13 commit 217e2bfab22e740227df09f22165e834cddd8a3b upstream. In docutils 0.13, the return type of get_column_widths method of the Table directive has changed [1], which breaks our flat-table directive and leads to a TypeError when trying to build the docs [2]. This patch adds support for the new return type, while keeping support for older docutils versions too. [1] https://sourceforge.net/p/docutils/patches/120/ [2] https://sourceforge.net/p/docutils/bugs/303/ Signed-off-by: Dmitry Shachnev Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- Documentation/sphinx/rstFlatTable.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/sphinx/rstFlatTable.py b/Documentation/sphinx/rstFlatTable.py index 55f275793028..25feb0d35e7a 100755 --- a/Documentation/sphinx/rstFlatTable.py +++ b/Documentation/sphinx/rstFlatTable.py @@ -157,6 +157,11 @@ class ListTableBuilder(object): def buildTableNode(self): colwidths = self.directive.get_column_widths(self.max_cols) + if isinstance(colwidths, tuple): + # Since docutils 0.13, get_column_widths returns a (widths, + # colwidths) tuple, where widths is a string (i.e. 'auto'). + # See https://sourceforge.net/p/docutils/patches/120/. + colwidths = colwidths[1] stub_columns = self.directive.options.get('stub-columns', 0) header_rows = self.directive.options.get('header-rows', 0) -- GitLab From bf6a9b31e8a9cbdb6f67f84882fcf7d252e43d6f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 3 Dec 2016 12:34:32 -0800 Subject: [PATCH 0110/1442] hv: acquire vmbus_connection.channel_mutex in vmbus_free_channels() commit abd1026da4a7700a8db370947f75cd17b6ae6f76 upstream. "kernel BUG at drivers/hv/channel_mgmt.c:350!" is observed when hv_vmbus module is unloaded. BUG_ON() was introduced in commit 85d9aa705184 ("Drivers: hv: vmbus: add an API vmbus_hvsock_device_unregister()") as vmbus_free_channels() codepath was apparently forgotten. Fixes: 85d9aa705184 ("Drivers: hv: vmbus: add an API vmbus_hvsock_device_unregister()") Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 96a85cd39580..1bc1d4795243 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -389,6 +389,7 @@ void vmbus_free_channels(void) { struct vmbus_channel *channel, *tmp; + mutex_lock(&vmbus_connection.channel_mutex); list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list, listentry) { /* hv_process_channel_removal() needs this */ @@ -396,6 +397,7 @@ void vmbus_free_channels(void) vmbus_device_unregister(channel->device_obj); } + mutex_unlock(&vmbus_connection.channel_mutex); } /* -- GitLab From 38b2dc0b0f380984cd4fcc208497f8366f85f856 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Tue, 6 Dec 2016 22:53:48 -0500 Subject: [PATCH 0111/1442] staging/lustre/osc: Revert erroneous list_for_each_entry_safe use commit cd15dd6ef4ea11df87f717b8b1b83aaa738ec8af upstream. I have been having a lot of unexplainable crashes in osc_lru_shrink lately that I could not see a good explanation for and then I found this patch that slip under the radar somehow that incorrectly converted while loop for lru list iteration into list_for_each_entry_safe totally ignoring that in the body of the loop we drop spinlocks guarding this list and move list entries around. Not sure why it was not showing up right away, perhaps some of the more recent LRU changes committed caused some extra pressure on this code that finally highlighted the breakage. Reverts: 8adddc36b1fc ("staging: lustre: osc: Use list_for_each_entry_safe") CC: Bhaktipriya Shridhar Signed-off-by: Oleg Drokin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lustre/osc/osc_page.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c index 2a7a70aa9e80..9168451d2f29 100644 --- a/drivers/staging/lustre/lustre/osc/osc_page.c +++ b/drivers/staging/lustre/lustre/osc/osc_page.c @@ -542,7 +542,6 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, struct cl_object *clobj = NULL; struct cl_page **pvec; struct osc_page *opg; - struct osc_page *temp; int maxscan = 0; long count = 0; int index = 0; @@ -569,13 +568,15 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, spin_lock(&cli->cl_lru_list_lock); maxscan = min(target << 1, atomic_long_read(&cli->cl_lru_in_list)); - list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) { + while (!list_empty(&cli->cl_lru_list)) { struct cl_page *page; bool will_free = false; if (--maxscan < 0) break; + opg = list_entry(cli->cl_lru_list.next, struct osc_page, + ops_lru); page = opg->ops_cl.cpl_page; if (lru_page_busy(cli, page)) { list_move_tail(&opg->ops_lru, &cli->cl_lru_list); -- GitLab From 3b2f287b7795aa08cab1c61d99a4a615b63076a7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Nov 2016 16:21:20 +0100 Subject: [PATCH 0112/1442] staging: lustre: ldlm: pl_recalc time handling is wrong commit b8cb86fd95bb461c3496e1f4b4083b198c963a9c upstream. James Simmons reports: > The ldlm_pool field pl_recalc_time is set to the current > monotonic clock value but the interval period is calculated > with the wall clock. This means the interval period will > always be far larger than the pl_recalc_period, which is > just a small interval time period. The correct thing to > do is to use monotomic clock current value instead of the > wall clocks value when calculating recalc_interval_sec. This broke when I converted the 32-bit get_seconds() into ktime_get_{real_,}seconds() inconsistently. Either one of those two would have worked, but mixing them does not. Staying with the original intention of the patch, this changes the ktime_get_seconds() calls into ktime_get_real_seconds(), using real time instead of mononic time. Fixes: 8f83409cf238 ("staging/lustre: use 64-bit time for pl_recalc") Reported-by: James Simmons Signed-off-by: Arnd Bergmann Reviewed-by: James Simmons Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/lustre/ldlm/ldlm_pool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c index 9a1136e32dfc..34efb499c00b 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c @@ -356,10 +356,10 @@ static int ldlm_pool_recalc(struct ldlm_pool *pl) u32 recalc_interval_sec; int count; - recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; + recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { spin_lock(&pl->pl_lock); - recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; + recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; if (recalc_interval_sec > 0) { /* @@ -382,7 +382,7 @@ static int ldlm_pool_recalc(struct ldlm_pool *pl) count); } - recalc_interval_sec = pl->pl_recalc_time - ktime_get_seconds() + + recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() + pl->pl_recalc_period; if (recalc_interval_sec <= 0) { /* DEBUG: should be re-removed after LU-4536 is fixed */ @@ -657,7 +657,7 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, spin_lock_init(&pl->pl_lock); atomic_set(&pl->pl_granted, 0); - pl->pl_recalc_time = ktime_get_seconds(); + pl->pl_recalc_time = ktime_get_real_seconds(); atomic_set(&pl->pl_lock_volume_factor, 1); atomic_set(&pl->pl_grant_rate, 0); -- GitLab From bd1692bed615fd31cd04ae19369130060fa4ec04 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 14 Nov 2016 20:16:21 +0000 Subject: [PATCH 0113/1442] staging: comedi: ni_mio_common: fix M Series ni_ai_insn_read() data mask commit 655c4d442d1213b617926cc6d54e2a9a793fb46b upstream. For NI M Series cards, the Comedi `insn_read` handler for the AI subdevice is broken due to ANDing the value read from the AI FIFO data register with an incorrect mask. The incorrect mask clears all but the most significant bit of the sample data. It should preserve all the sample data bits. Correct it. Fixes: 817144ae7fda ("staging: comedi: ni_mio_common: remove unnecessary use of 'board->adbits'") Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_mio_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 0f97d7b611d7..24da77b7faee 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -1832,7 +1832,7 @@ static int ni_ai_insn_read(struct comedi_device *dev, unsigned int *data) { struct ni_private *devpriv = dev->private; - unsigned int mask = (s->maxdata + 1) >> 1; + unsigned int mask = s->maxdata; int i, n; unsigned int signbits; unsigned int d; -- GitLab From 745f7d0d1951e1109ad159860732c3ef157aba17 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 14 Nov 2016 20:16:22 +0000 Subject: [PATCH 0114/1442] staging: comedi: ni_mio_common: fix E series ni_ai_insn_read() data commit 857a661020a2de3a0304edf33ad656abee100891 upstream. Commit 0557344e2149 ("staging: comedi: ni_mio_common: fix local var for 32-bit read") changed the type of local variable `d` from `unsigned short` to `unsigned int` to fix a bug introduced in commit 9c340ac934db ("staging: comedi: ni_stc.h: add read/write callbacks to struct ni_private") when reading AI data for NI PCI-6110 and PCI-6111 cards. Unfortunately, other parts of the function rely on the variable being `unsigned short` when an offset value in local variable `signbits` is added to `d` before writing the value to the `data` array: d += signbits; data[n] = d; The `signbits` variable will be non-zero in bipolar mode, and is used to convert the hardware's 2's complement, 16-bit numbers to Comedi's straight binary sample format (with 0 representing the most negative voltage). This breaks because `d` is now 32 bits wide instead of 16 bits wide, so after the addition of `signbits`, `data[n]` ends up being set to values above 65536 for negative voltages. This affects all supported "E series" cards except PCI-6143 (and PXI-6143). Fix it by ANDing the value written to the `data[n]` with the mask 0xffff. Fixes: 0557344e2149 ("staging: comedi: ni_mio_common: fix local var for 32-bit read") Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_mio_common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 24da77b7faee..1c967c30e4ce 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -1875,7 +1875,7 @@ static int ni_ai_insn_read(struct comedi_device *dev, return -ETIME; } d += signbits; - data[n] = d; + data[n] = d & 0xffff; } } else if (devpriv->is_6143) { for (n = 0; n < insn->n; n++) { @@ -1924,9 +1924,8 @@ static int ni_ai_insn_read(struct comedi_device *dev, data[n] = dl; } else { d = ni_readw(dev, NI_E_AI_FIFO_DATA_REG); - /* subtle: needs to be short addition */ d += signbits; - data[n] = d; + data[n] = d & 0xffff; } } } -- GitLab From 6d46601e656412d351ceda73226419a9bf5cb2d7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 29 Nov 2016 15:32:15 +0100 Subject: [PATCH 0115/1442] ACPI / video: Add force_native quirk for Dell XPS 17 L702X commit 350fa038c31b056fc509624efb66348ac2c1e3d0 upstream. The Dell XPS 17 L702X has a non-working acpi_video0 backlight interface and an intel_backlight interface which works fine. Add a force_native quirk for it so that the non-working acpi_video0 interface does not get registered. Note that there also is an issue with the brightnesskeys on this laptop, they do not generate key-press events in anyway. That is not solved by this patch. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1123661 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index a6b36fc53aec..7f48156cbc0c 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -296,6 +296,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Vostro V131"), }, }, + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1123661 */ + .callback = video_detect_force_native, + .ident = "Dell XPS 17 L702X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell System XPS L702X"), + }, + }, { }, }; -- GitLab From a04465251f948a345b7642bf292c063dc482146a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 29 Nov 2016 15:32:16 +0100 Subject: [PATCH 0116/1442] ACPI / video: Add force_native quirk for HP Pavilion dv6 commit 6276e53fa8c06a3a5cf7b95b77b079966de9ad66 upstream. The HP Pavilion dv6 has a non-working acpi_video0 backlight interface and an intel_backlight interface which works fine. Add a force_native quirk for it so that the non-working acpi_video0 interface does not get registered. Note that there are quite a few HP Pavilion dv6 variants, some woth ATI and some with NVIDIA hybrid gfx, both seem to need this quirk to have working backlight control. There are also some versions with only Intel integrated gfx, these may not need this quirk, but it should not hurt there. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1204476 Link: https://bugs.launchpad.net/ubuntu/+source/linux-lts-trusty/+bug/1416940 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 7f48156cbc0c..02ded25c82e4 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -305,6 +305,17 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell System XPS L702X"), }, }, + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1204476 */ + /* https://bugs.launchpad.net/ubuntu/+source/linux-lts-trusty/+bug/1416940 */ + .callback = video_detect_force_native, + .ident = "HP Pavilion dv6", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv6 Notebook PC"), + }, + }, + { }, }; -- GitLab From f3dd47e0f683cfcca9e1d31f61d33aed37c5f122 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 5 Dec 2016 12:31:14 -0500 Subject: [PATCH 0117/1442] drm/amdgpu/si: load the proper firmware on 0x87 oland boards commit 5a23f2720589ec4757bc62183902d2518f02026e upstream. New variant. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index d6f85b1a0b93..961e5eb92a1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -7713,6 +7713,7 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev) (adev->pdev->revision == 0x80) || (adev->pdev->revision == 0x81) || (adev->pdev->revision == 0x83) || + (adev->pdev->revision == 0x87) || (adev->pdev->device == 0x6604) || (adev->pdev->device == 0x6605)) chip_name = "oland_k"; -- GitLab From eb0d743b41882ad2987f4fe4000dce595fce1b7f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 5 Dec 2016 12:27:52 -0500 Subject: [PATCH 0118/1442] drm/amdgpu: add additional pci revision to dpm workaround commit ce66cb1e9cbf91fcb216de64a0fe65aa17f97bc1 upstream. New variant. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 961e5eb92a1e..6d2ea76f4eb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3504,6 +3504,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, (adev->pdev->revision == 0x80) || (adev->pdev->revision == 0x81) || (adev->pdev->revision == 0x83) || + (adev->pdev->revision == 0x87) || (adev->pdev->device == 0x6604) || (adev->pdev->device == 0x6605)) { max_sclk = 75000; -- GitLab From 2cea2151623122f78b5287acba658208bb0ee4a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 27 Oct 2016 17:11:43 +0900 Subject: [PATCH 0119/1442] drm/amdgpu: Store CRTC relative amdgpu_crtc->cursor_x/y values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8e57ec613df7d6bfa8ffe7512290c5415ebb8657 upstream. We were storing viewport relative coordinates. However, crtc_cursor_set2 and cursor_reset pass amdgpu_crtc->cursor_x/y as the x/y parameters of cursor_move_locked, which would break if the CRTC isn't located at (0, 0). Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 6 +++--- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 9260caef74fa..e5af57318f34 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2577,6 +2577,9 @@ static int dce_v10_0_cursor_move_locked(struct drm_crtc *crtc, struct amdgpu_device *adev = crtc->dev->dev_private; int xorigin = 0, yorigin = 0; + amdgpu_crtc->cursor_x = x; + amdgpu_crtc->cursor_y = y; + /* avivo cursor are offset into the total surface */ x += crtc->x; y += crtc->y; @@ -2596,9 +2599,6 @@ static int dce_v10_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); - amdgpu_crtc->cursor_x = x; - amdgpu_crtc->cursor_y = y; - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 367739bd1927..5b2fa17a5589 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2593,6 +2593,9 @@ static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc, struct amdgpu_device *adev = crtc->dev->dev_private; int xorigin = 0, yorigin = 0; + amdgpu_crtc->cursor_x = x; + amdgpu_crtc->cursor_y = y; + /* avivo cursor are offset into the total surface */ x += crtc->x; y += crtc->y; @@ -2612,9 +2615,6 @@ static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); - amdgpu_crtc->cursor_x = x; - amdgpu_crtc->cursor_y = y; - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 15f9fc0514b2..b2447db693c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1933,6 +1933,9 @@ static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc, int w = amdgpu_crtc->cursor_width; + amdgpu_crtc->cursor_x = x; + amdgpu_crtc->cursor_y = y; + /* avivo cursor are offset into the total surface */ x += crtc->x; y += crtc->y; @@ -1952,8 +1955,6 @@ static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(EVERGREEN_CUR_SIZE + amdgpu_crtc->crtc_offset, ((w - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); - amdgpu_crtc->cursor_x = x; - amdgpu_crtc->cursor_y = y; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 8c4d808db0f1..80e71d100d58 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2465,6 +2465,9 @@ static int dce_v8_0_cursor_move_locked(struct drm_crtc *crtc, struct amdgpu_device *adev = crtc->dev->dev_private; int xorigin = 0, yorigin = 0; + amdgpu_crtc->cursor_x = x; + amdgpu_crtc->cursor_y = y; + /* avivo cursor are offset into the total surface */ x += crtc->x; y += crtc->y; @@ -2484,9 +2487,6 @@ static int dce_v8_0_cursor_move_locked(struct drm_crtc *crtc, WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); - amdgpu_crtc->cursor_x = x; - amdgpu_crtc->cursor_y = y; - return 0; } -- GitLab From 8e1b86f30bc1e3d213d269a74b3375a06ba8199f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 27 Oct 2016 17:01:26 +0900 Subject: [PATCH 0120/1442] drm/amdgpu: Also call cursor_move_locked when the cursor size changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8b02cde994e3025b6886c82eac6cd1e7bc4d1fe9 upstream. The cursor size also affects the register programming. Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 9 +++++---- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index e5af57318f34..882404cefbc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2661,12 +2661,11 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - amdgpu_crtc->cursor_width = width; - amdgpu_crtc->cursor_height = height; - dce_v10_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -2675,6 +2674,8 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v10_0_cursor_move_locked(crtc, x, y); + amdgpu_crtc->cursor_width = width; + amdgpu_crtc->cursor_height = height; amdgpu_crtc->cursor_hot_x = hot_x; amdgpu_crtc->cursor_hot_y = hot_y; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 5b2fa17a5589..7ddc32127d88 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2677,12 +2677,11 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - amdgpu_crtc->cursor_width = width; - amdgpu_crtc->cursor_height = height; - dce_v11_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -2691,6 +2690,8 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v11_0_cursor_move_locked(crtc, x, y); + amdgpu_crtc->cursor_width = width; + amdgpu_crtc->cursor_height = height; amdgpu_crtc->cursor_hot_x = hot_x; amdgpu_crtc->cursor_hot_y = hot_y; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index b2447db693c0..fde6ee1f6f2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2017,12 +2017,11 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - amdgpu_crtc->cursor_width = width; - amdgpu_crtc->cursor_height = height; - dce_v6_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -2031,6 +2030,8 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v6_0_cursor_move_locked(crtc, x, y); + amdgpu_crtc->cursor_width = width; + amdgpu_crtc->cursor_height = height; amdgpu_crtc->cursor_hot_x = hot_x; amdgpu_crtc->cursor_hot_y = hot_y; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 80e71d100d58..7d9ffde0a628 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2549,12 +2549,11 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - amdgpu_crtc->cursor_width = width; - amdgpu_crtc->cursor_height = height; - dce_v8_0_lock_cursor(crtc, true); - if (hot_x != amdgpu_crtc->cursor_hot_x || + if (width != amdgpu_crtc->cursor_width || + height != amdgpu_crtc->cursor_height || + hot_x != amdgpu_crtc->cursor_hot_x || hot_y != amdgpu_crtc->cursor_hot_y) { int x, y; @@ -2563,6 +2562,8 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, dce_v8_0_cursor_move_locked(crtc, x, y); + amdgpu_crtc->cursor_width = width; + amdgpu_crtc->cursor_height = height; amdgpu_crtc->cursor_hot_x = hot_x; amdgpu_crtc->cursor_hot_y = hot_y; } -- GitLab From c2a51dd3138df107432fc2ff452cf43ee49a7dff Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Fri, 25 Nov 2016 16:55:16 +0530 Subject: [PATCH 0121/1442] drm/amd/amdgpu: enable GUI idle INT after enabling CGCG commit dd31ae9ac933636c3712b7dd0f6152c1d71f81fe upstream. GUI idle interrupts should be enabled only after we have enabled coarse grain clock gating (CGCG). This prevents GFX engine generating idle interrupt even though CGCG is not completely enabled. Most of the time this goes un-noticed, but on some Stoney ASICs this results in GFX engine hang after system resumes from suspend. The issue is not particular to Stoney though and could have occured on any ASIC. The patch fixes this issue. Reviewed-by: Alex Deucher Reported-by: Sunil Uttarwar Signed-off-by: Arindam Nath Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index bb97182dc749..2534b4989924 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5891,29 +5891,24 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev adev->gfx.rlc.funcs->enter_safe_mode(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { - /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/ - * Cmp_busy/GFX_Idle interrupts - */ - gfx_v8_0_enable_gui_idle_interrupt(adev, true); - temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; if (temp1 != data1) WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); - /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ + /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); - /* 3 - clear cgcg override */ + /* 2 - clear cgcg override */ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); - /* 4 - write cmd to set CGLS */ + /* 3 - write cmd to set CGLS */ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); - /* 5 - enable cgcg */ + /* 4 - enable cgcg */ data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { @@ -5931,6 +5926,11 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev if (temp != data) WREG32(mmRLC_CGCG_CGLS_CTRL, data); + + /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ + * Cmp_busy/GFX_Idle interrupts + */ + gfx_v8_0_enable_gui_idle_interrupt(adev, true); } else { /* disable cntx_empty_int_enable & GFX Idle interrupt */ gfx_v8_0_enable_gui_idle_interrupt(adev, false); -- GitLab From 42e5fd6bce5bec03590aa4467c30f6338db0bbbc Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 4 Nov 2016 18:36:17 +0900 Subject: [PATCH 0122/1442] drm/nouveau/gr: fallback to legacy paths during firmware lookup commit e137040e0d0376b404fc5155eba44ea07126e3bd upstream. Look for firmware files using the legacy ("nouveau/nvxx_fucxxxx") path if they cannot be found in the new, "official" path. User setups were broken by the switch, which is bad. There are only 4 firmware files we may want to look up that way, so hardcode them into the lookup function. All new firmware files should use the standard "nvidia//gr/" path. Fixes: 8539b37acef7 ("drm/nouveau/gr: use NVIDIA-provided external firmwares") Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 50 +++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 157919c788e6..6584d505460c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1755,6 +1755,50 @@ gf100_gr_ = { .object_get = gf100_gr_object_get, }; +int +gf100_gr_ctor_fw_legacy(struct gf100_gr *gr, const char *fwname, + struct gf100_gr_fuc *fuc, int ret) +{ + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + struct nvkm_device *device = subdev->device; + const struct firmware *fw; + char f[32]; + + /* see if this firmware has a legacy path */ + if (!strcmp(fwname, "fecs_inst")) + fwname = "fuc409c"; + else if (!strcmp(fwname, "fecs_data")) + fwname = "fuc409d"; + else if (!strcmp(fwname, "gpccs_inst")) + fwname = "fuc41ac"; + else if (!strcmp(fwname, "gpccs_data")) + fwname = "fuc41ad"; + else { + /* nope, let's just return the error we got */ + nvkm_error(subdev, "failed to load %s\n", fwname); + return ret; + } + + /* yes, try to load from the legacy path */ + nvkm_debug(subdev, "%s: falling back to legacy path\n", fwname); + + snprintf(f, sizeof(f), "nouveau/nv%02x_%s", device->chipset, fwname); + ret = request_firmware(&fw, f, device->dev); + if (ret) { + snprintf(f, sizeof(f), "nouveau/%s", fwname); + ret = request_firmware(&fw, f, device->dev); + if (ret) { + nvkm_error(subdev, "failed to load %s\n", fwname); + return ret; + } + } + + fuc->size = fw->size; + fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL); + release_firmware(fw); + return (fuc->data != NULL) ? 0 : -ENOMEM; +} + int gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname, struct gf100_gr_fuc *fuc) @@ -1765,10 +1809,8 @@ gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname, int ret; ret = nvkm_firmware_get(device, fwname, &fw); - if (ret) { - nvkm_error(subdev, "failed to load %s\n", fwname); - return ret; - } + if (ret) + return gf100_gr_ctor_fw_legacy(gr, fwname, fuc, ret); fuc->size = fw->size; fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL); -- GitLab From 2c256b8e81bf7ce751531b679421df517e8e0ec5 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 19 Oct 2016 12:15:52 +1000 Subject: [PATCH 0123/1442] drm/nouveau/kms: lvds panel strap moved again on maxwell commit 768e847759d551c96e129e194588dbfb11a1d576 upstream. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_bios.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index a1570b109434..23ffe8571a99 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -333,6 +333,9 @@ get_fp_strap(struct drm_device *dev, struct nvbios *bios) if (bios->major_version < 5 && bios->data[0x48] & 0x4) return NVReadVgaCrtc5758(dev, 0, 0xf) & 0xf; + if (drm->device.info.family >= NV_DEVICE_INFO_V0_MAXWELL) + return nvif_rd32(device, 0x001800) & 0x0000000f; + else if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) return (nvif_rd32(device, NV_PEXTDEV_BOOT_0) >> 24) & 0xf; else -- GitLab From d95ef8af5425f7ac99427b7d4fad5b4c8e54c879 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 14 Oct 2016 13:16:36 +1000 Subject: [PATCH 0124/1442] drm/nouveau/bios: require checksum to match for fast acpi shadow method commit 5dc7f4aa9d84ea94b54a9bfcef095f0289f1ebda upstream. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c | 7 +++++-- drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h index 212800ecdce9..7d1d3c6b4b72 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h @@ -12,6 +12,7 @@ struct nvbios_source { bool rw; bool ignore_checksum; bool no_pcir; + bool require_checksum; }; int nvbios_extend(struct nvkm_bios *, u32 length); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index b2557e87afdd..7deb81b6dbac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -86,9 +86,12 @@ shadow_image(struct nvkm_bios *bios, int idx, u32 offset, struct shadow *mthd) nvbios_checksum(&bios->data[image.base], image.size)) { nvkm_debug(subdev, "%08x: checksum failed\n", image.base); - if (mthd->func->rw) + if (!mthd->func->require_checksum) { + if (mthd->func->rw) + score += 1; score += 1; - score += 1; + } else + return 0; } else { score += 3; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c index 8fecb5ff22a0..06572f8ce914 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c @@ -99,6 +99,7 @@ nvbios_acpi_fast = { .init = acpi_init, .read = acpi_read_fast, .rw = false, + .require_checksum = true, }; const struct nvbios_source -- GitLab From a5b4c77c92da629bf10926fe05cdf972e0e08f86 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 13 Dec 2016 09:29:55 +1000 Subject: [PATCH 0125/1442] drm/nouveau/ltc: protect clearing of comptags with mutex commit f4e65efc88b64c1dbca275d42a188edccedb56c6 upstream. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c index 39c2a38e54f7..0c7ef250dcaf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c @@ -47,8 +47,10 @@ nvkm_ltc_tags_clear(struct nvkm_ltc *ltc, u32 first, u32 count) BUG_ON((first > limit) || (limit >= ltc->num_tags)); + mutex_lock(<c->subdev.mutex); ltc->func->cbc_clear(ltc, first, limit); ltc->func->cbc_wait(ltc); + mutex_unlock(<c->subdev.mutex); } int -- GitLab From bfa563bae25131fc084c273b43a789819e302fe7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 12 Dec 2016 17:52:45 +1000 Subject: [PATCH 0126/1442] drm/nouveau/ttm: wait for bo fence to signal before unmapping vmas commit 10dcab3e7f477bffee88d518aad57d06777cfdf4 upstream. TTM was changed a while back to allow for pipelining of buffer moves, and part of this was the removal of waiting for a BO to idle before calling move(), placing the responsibility on the driver to do this if required. That's all well and good, except, we make use of move_notify() to handle mapping/unmapping from the GPU VMM as move() isn't called on all paths. This commit adds a wait before unmapping from a VMM in move_notify(), to prevent GPU page faults where a buffer is still being accessed. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_bo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 343b8659472c..a2e6a81669e7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1209,6 +1209,7 @@ nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem) nvbo->page_shift != vma->vm->mmu->lpg_shift)) { nvkm_vm_map(vma, new_mem->mm_node); } else { + WARN_ON(ttm_bo_wait(bo, false, false)); nvkm_vm_unmap(vma); } } -- GitLab From 64a5c4725fd385cb51c0b6b63b3f65d68d4aa2a8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 3 Nov 2016 16:37:33 +1000 Subject: [PATCH 0127/1442] drm/nouveau/i2c/gk110b,gm10x: use the correct implementation commit 5b3800a6b763874e4a23702fb9628d3bd3315ce9 upstream. DPAUX registers moved on Kepler, these chipsets were still using the Fermi implementation for some reason. This fixes detection of hotplug/sink IRQs on DP connectors. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 7218a067a6c5..e0d7f8472ac6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1851,7 +1851,7 @@ nvf1_chipset = { .fb = gk104_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gf119_i2c_new, + .i2c = gk104_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1965,7 +1965,7 @@ nv117_chipset = { .fb = gm107_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, - .i2c = gf119_i2c_new, + .i2c = gk104_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1999,7 +1999,7 @@ nv118_chipset = { .fb = gm107_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, - .i2c = gf119_i2c_new, + .i2c = gk104_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, -- GitLab From 9195157f7d03758655ea6940348152b2c563fa1a Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 5 Nov 2016 14:33:14 +1000 Subject: [PATCH 0128/1442] drm/nouveau/fifo/gf100-: protect channel preempt with subdev mutex commit b27add13f500469127afdf011dbcc9c649e16e54 upstream. This avoids an issue that occurs when we're attempting to preempt multiple channels simultaneously. HW seems to ignore preempt requests while it's still processing a previous one, which, well, makes sense. Fixes random "fifo: SCHED_ERROR 0d []" + GPCCS page faults during parallel piglit runs on (at least) GM107. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c | 9 ++++++--- drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c | 8 +++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c index cbc67f262322..12d964260a29 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c @@ -60,6 +60,7 @@ gf100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base, struct nvkm_gpuobj *inst = chan->base.inst; int ret = 0; + mutex_lock(&subdev->mutex); nvkm_wr32(device, 0x002634, chan->base.chid); if (nvkm_msec(device, 2000, if (nvkm_rd32(device, 0x002634) == chan->base.chid) @@ -67,10 +68,12 @@ gf100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base, ) < 0) { nvkm_error(subdev, "channel %d [%s] kick timeout\n", chan->base.chid, chan->base.object.client->name); - ret = -EBUSY; - if (suspend) - return ret; + ret = -ETIMEDOUT; } + mutex_unlock(&subdev->mutex); + + if (ret && suspend) + return ret; if (offset) { nvkm_kmap(inst); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c index ed4351032ed6..a2df4f3e7763 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c @@ -40,7 +40,9 @@ gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan) struct nvkm_subdev *subdev = &fifo->base.engine.subdev; struct nvkm_device *device = subdev->device; struct nvkm_client *client = chan->base.object.client; + int ret = 0; + mutex_lock(&subdev->mutex); nvkm_wr32(device, 0x002634, chan->base.chid); if (nvkm_msec(device, 2000, if (!(nvkm_rd32(device, 0x002634) & 0x00100000)) @@ -48,10 +50,10 @@ gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan) ) < 0) { nvkm_error(subdev, "channel %d [%s] kick timeout\n", chan->base.chid, client->name); - return -EBUSY; + ret = -ETIMEDOUT; } - - return 0; + mutex_unlock(&subdev->mutex); + return ret; } static u32 -- GitLab From 45ec673dfe0f8b1564289067a35f4abde3bd8472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 27 Oct 2016 13:03:23 +0900 Subject: [PATCH 0129/1442] drm/radeon: Also call cursor_move_locked when the cursor size changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dcab0fa64e300afa18f39cd98d05e0950f652adf upstream. The cursor size also affects the register programming. Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_cursor.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 2a10e24b34b1..99a1c8e4c7be 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -297,12 +297,11 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - radeon_crtc->cursor_width = width; - radeon_crtc->cursor_height = height; - radeon_lock_cursor(crtc, true); - if (hot_x != radeon_crtc->cursor_hot_x || + if (width != radeon_crtc->cursor_width || + height != radeon_crtc->cursor_height || + hot_x != radeon_crtc->cursor_hot_x || hot_y != radeon_crtc->cursor_hot_y) { int x, y; @@ -311,6 +310,8 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_cursor_move_locked(crtc, x, y); + radeon_crtc->cursor_width = width; + radeon_crtc->cursor_height = height; radeon_crtc->cursor_hot_x = hot_x; radeon_crtc->cursor_hot_y = hot_y; } -- GitLab From 126f676b087d091e714fcae1ebe87182062d138b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 27 Oct 2016 14:54:31 +0900 Subject: [PATCH 0130/1442] drm/radeon: Hide the HW cursor while it's out of bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6b16cf7785a4200b1bddf4f70c9dda2efc49e278 upstream. Fixes hangs in that case under some circumstances. v2: * Only use non-0 x/yorigin if the cursor is (partially) outside of the top/left edge of the total surface with AVIVO/DCE Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1000433 Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher (v1) Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_cursor.c | 60 ++++++++++++++++++-------- drivers/gpu/drm/radeon/radeon_mode.h | 1 + 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 99a1c8e4c7be..87a72476d313 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -90,6 +90,9 @@ static void radeon_show_cursor(struct drm_crtc *crtc) struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_device *rdev = crtc->dev->dev_private; + if (radeon_crtc->cursor_out_of_bounds) + return; + if (ASIC_IS_DCE4(rdev)) { WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, upper_32_bits(radeon_crtc->cursor_addr)); @@ -148,16 +151,17 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) x += crtc->x; y += crtc->y; } - DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); - if (x < 0) { + if (x < 0) xorigin = min(-x, radeon_crtc->max_cursor_width - 1); - x = 0; - } - if (y < 0) { + if (y < 0) yorigin = min(-y, radeon_crtc->max_cursor_height - 1); - y = 0; + + if (!ASIC_IS_AVIVO(rdev)) { + x += crtc->x; + y += crtc->y; } + DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); /* fixed on DCE6 and newer */ if (ASIC_IS_AVIVO(rdev) && !ASIC_IS_DCE6(rdev)) { @@ -180,27 +184,31 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) if (i > 1) { int cursor_end, frame_end; - cursor_end = x - xorigin + w; + cursor_end = x + w; frame_end = crtc->x + crtc->mode.crtc_hdisplay; if (cursor_end >= frame_end) { w = w - (cursor_end - frame_end); if (!(frame_end & 0x7f)) w--; - } else { - if (!(cursor_end & 0x7f)) - w--; + } else if (cursor_end <= 0) { + goto out_of_bounds; + } else if (!(cursor_end & 0x7f)) { + w--; } if (w <= 0) { - w = 1; - cursor_end = x - xorigin + w; - if (!(cursor_end & 0x7f)) { - x--; - WARN_ON_ONCE(x < 0); - } + goto out_of_bounds; } } } + if (x <= (crtc->x - w) || y <= (crtc->y - radeon_crtc->cursor_height) || + x >= (crtc->x + crtc->mode.crtc_hdisplay) || + y >= (crtc->y + crtc->mode.crtc_vdisplay)) + goto out_of_bounds; + + x += xorigin; + y += yorigin; + if (ASIC_IS_DCE4(rdev)) { WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, (x << 16) | y); WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); @@ -212,6 +220,9 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) WREG32(AVIVO_D1CUR_SIZE + radeon_crtc->crtc_offset, ((w - 1) << 16) | (radeon_crtc->cursor_height - 1)); } else { + x -= crtc->x; + y -= crtc->y; + if (crtc->mode.flags & DRM_MODE_FLAG_DBLSCAN) y *= 2; @@ -232,6 +243,19 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) radeon_crtc->cursor_x = x; radeon_crtc->cursor_y = y; + if (radeon_crtc->cursor_out_of_bounds) { + radeon_crtc->cursor_out_of_bounds = false; + if (radeon_crtc->cursor_bo) + radeon_show_cursor(crtc); + } + + return 0; + + out_of_bounds: + if (!radeon_crtc->cursor_out_of_bounds) { + radeon_hide_cursor(crtc); + radeon_crtc->cursor_out_of_bounds = true; + } return 0; } @@ -308,12 +332,12 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, x = radeon_crtc->cursor_x + radeon_crtc->cursor_hot_x - hot_x; y = radeon_crtc->cursor_y + radeon_crtc->cursor_hot_y - hot_y; - radeon_cursor_move_locked(crtc, x, y); - radeon_crtc->cursor_width = width; radeon_crtc->cursor_height = height; radeon_crtc->cursor_hot_x = hot_x; radeon_crtc->cursor_hot_y = hot_y; + + radeon_cursor_move_locked(crtc, x, y); } radeon_show_cursor(crtc); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index bb75201a24ba..f1da484864a9 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -330,6 +330,7 @@ struct radeon_crtc { u16 lut_r[256], lut_g[256], lut_b[256]; bool enabled; bool can_tile; + bool cursor_out_of_bounds; uint32_t crtc_offset; struct drm_gem_object *cursor_bo; uint64_t cursor_addr; -- GitLab From 851bedd86b95516ca3334b1eadf8b68eb220c5e9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Dec 2016 00:21:48 -0500 Subject: [PATCH 0131/1442] drm/radeon: add additional pci revision to dpm workaround commit 8729675c00a8d13cb2094d617d70a4a4da7d83c5 upstream. New variant. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index c49934527a87..8b5e697f2549 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3026,6 +3026,7 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, (rdev->pdev->revision == 0x80) || (rdev->pdev->revision == 0x81) || (rdev->pdev->revision == 0x83) || + (rdev->pdev->revision == 0x87) || (rdev->pdev->device == 0x6604) || (rdev->pdev->device == 0x6605)) { max_sclk = 75000; -- GitLab From 6187f21f8c97f66a15e7c5392bd10fcf2cbc395a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Dec 2016 00:23:35 -0500 Subject: [PATCH 0132/1442] drm/radeon/si: load the proper firmware on 0x87 oland boards commit abb2e3c1ce64c8bba678973800c34ea1dc97c42c upstream. New variant. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index e402be8821c4..125c7e82c3d1 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -1714,6 +1714,7 @@ static int si_init_microcode(struct radeon_device *rdev) (rdev->pdev->revision == 0x80) || (rdev->pdev->revision == 0x81) || (rdev->pdev->revision == 0x83) || + (rdev->pdev->revision == 0x87) || (rdev->pdev->device == 0x6604) || (rdev->pdev->device == 0x6605)) new_smc = true; -- GitLab From aa286e88fd914aecfdf5c4807fb814233c0d092b Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Tue, 1 Nov 2016 15:43:15 +0100 Subject: [PATCH 0133/1442] drm/gma500: Add compat ioctl commit 0a97c81a9717431e6c57ea845b59c3c345edce67 upstream. Hook up drm_compat_ioctl to support 32-bit userspace on 64-bit kernels. It turns out that N2600 and N2800 comes with 64-bit enabled. We previously assumed there where no such systems out there. Signed-off-by: Patrik Jakobsson Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/20161101144315.2955-1-patrik.r.jakobsson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/gma500/psb_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index 50eb944fb78a..8f3ca526bd1b 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -473,6 +473,9 @@ static const struct file_operations psb_gem_fops = { .open = drm_open, .release = drm_release, .unlocked_ioctl = psb_unlocked_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = drm_compat_ioctl, +#endif .mmap = drm_gem_mmap, .poll = drm_poll, .read = drm_read, -- GitLab From 326760ba1802a5b2f73de06f3bac4e9752a3257a Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 1 Dec 2016 17:14:45 +0800 Subject: [PATCH 0134/1442] drm/amd/powerplay: bypass fan table setup if no fan connected commit 10e2ca346bf74561ff1b7fff6287716ab976cd8c upstream. If vBIOS noFan bit is set, the fan table parameters in thermal controller will not get initialized. The driver should avoid to use these uninitialized parameter to do calculation. Otherwise, it may trigger divide 0 error. Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c | 6 ++++++ drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c | 6 ++++++ drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c | 6 ++++++ drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c | 6 ++++++ 4 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c index 76310ac7ef0d..dca1b13fda2f 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c @@ -1958,6 +1958,12 @@ int fiji_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) int res; uint64_t tmp64; + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + if (smu_data->smu7_data.fan_table_start == 0) { phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c index 8c889caba420..6c26b83655d0 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c @@ -2006,6 +2006,12 @@ int iceland_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl)) return 0; + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + if (0 == smu7_data->fan_table_start) { phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); return 0; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c index 71bb2f8dc157..8ca1a3341dea 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c @@ -1885,6 +1885,12 @@ int polaris10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) int res; uint64_t tmp64; + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + if (smu_data->smu7_data.fan_table_start == 0) { phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c index de2a24d85f48..a6619e530fe3 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smc.c @@ -2496,6 +2496,12 @@ int tonga_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) PHM_PlatformCaps_MicrocodeFanControl)) return 0; + if (hwmgr->thermal_controller.fanInfo.bNoFan) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + if (0 == smu_data->smu7_data.fan_table_start) { phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); -- GitLab From f2d3d7f84cdf60d2e7e103a3381910005a4ca669 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 7 Dec 2016 17:22:25 +0800 Subject: [PATCH 0135/1442] drm/amdgpu: fix enable_cp_power_gating in gfx_v8.0. commit eb584241226958d45aa1f07f4f6a6ea9da98b29e upstream. the CP_PG_DISABLE bit was reversed. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2534b4989924..c6fca7e7805f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3994,7 +3994,7 @@ static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) { - WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0); + WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); } static void gfx_v8_0_init_pg(struct amdgpu_device *adev) -- GitLab From 8dbd6f709e62394fedb4fe75a3e553cbb6cbdbd3 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 7 Dec 2016 17:44:20 +0800 Subject: [PATCH 0136/1442] drm/amdgpu: fix init save/restore list in gfx_v8.0 commit 202e0b227b906cb80a2791f21216a55d9468d61b upstream. set valid data to mmRLC_SRM_INDEX_CNTL_ADDRx/DATAx. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index c6fca7e7805f..a88d365be4c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3947,8 +3947,12 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; data = mmRLC_SRM_INDEX_CNTL_DATA_0; for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { - amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); - amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); + if (unique_indices[i] != 0) { + amdgpu_mm_wreg(adev, temp + i, + unique_indices[i] & 0x3FFFF, false); + amdgpu_mm_wreg(adev, data + i, + unique_indices[i] >> 20, false); + } } kfree(register_list_format); -- GitLab From 66469319c01a1a36c78d0a8535e3dcbc1349bb19 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Thu, 15 Dec 2016 16:12:41 +1100 Subject: [PATCH 0137/1442] drivers/gpu/drm/ast: Fix infinite loop if read fails commit 298360af3dab45659810fdc51aba0c9f4097e4f6 upstream. ast_get_dram_info() configures a window in order to access BMC memory. A BMC register can be configured to disallow this, and if so, causes an infinite loop in the ast driver which renders the system unusable. Fix this by erroring out if an error is detected. On powerpc systems with EEH, this leads to the device being fenced and the system continuing to operate. Signed-off-by: Russell Currey Reviewed-by: Joel Stanley Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161215051241.20815-1-ruscur@russell.cc Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 904beaa932d0..f75c6421db62 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -223,7 +223,8 @@ static int ast_get_dram_info(struct drm_device *dev) ast_write32(ast, 0x10000, 0xfc600309); do { - ; + if (pci_channel_offline(dev->pdev)) + return -EIO; } while (ast_read32(ast, 0x10000) != 0x01); data = ast_read32(ast, 0x10004); @@ -428,7 +429,9 @@ int ast_driver_load(struct drm_device *dev, unsigned long flags) ast_detect_chip(dev, &need_post); if (ast->chip != AST1180) { - ast_get_dram_info(dev); + ret = ast_get_dram_info(dev); + if (ret) + goto out_free; ast->vram_size = ast_get_vram_info(dev); DRM_INFO("dram %d %d %d %08x\n", ast->mclk, ast->dram_type, ast->dram_bus_width, ast->vram_size); } -- GitLab From 0814c3ede27a5c1f7994ccd8998f6b41e802949d Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 24 Nov 2016 13:34:02 +0200 Subject: [PATCH 0138/1442] mei: request async autosuspend at the end of enumeration commit d5f8e166c25750adc147b0adf64a62a91653438a upstream. pm_runtime_autosuspend can take synchronous or asynchronous paths, Because we are calling pm_runtime_mark_last_busy just before this most of the cases it takes the asynchronous way. However, when the FW or driver resets during already running runtime suspend, the call will result in calling to the driver's rpm callback and results in a deadlock on device_lock. The simplest fix is to replace pm_runtime_autosuspend with asynchronous pm_request_autosuspend. Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 6fe02350578d..f999a8d3c9c4 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -686,7 +686,7 @@ void mei_host_client_init(struct mei_device *dev) pm_runtime_mark_last_busy(dev->dev); dev_dbg(dev->dev, "rpm: autosuspend\n"); - pm_runtime_autosuspend(dev->dev); + pm_request_autosuspend(dev->dev); } /** -- GitLab From 09c154920e7738f9a597354432b81d99b9105f5e Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 29 Nov 2016 16:49:27 +0200 Subject: [PATCH 0139/1442] mei: me: add lewisburg device ids commit 9ff2007bea1f1bfc53ac0bc7ccf8200bb275fd52 upstream. Add MEI Lewisburg PCH IDs for Purley based workstations. Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 7ad15d678878..c8307e8b4c16 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -122,6 +122,8 @@ #define MEI_DEV_ID_SPT_H 0xA13A /* Sunrise Point H */ #define MEI_DEV_ID_SPT_H_2 0xA13B /* Sunrise Point H 2 */ +#define MEI_DEV_ID_LBG 0xA1BA /* Lewisburg (SPT) */ + #define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */ #define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index f3ffd883b232..f9c6ec4b98ab 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -87,6 +87,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_sps_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_sps_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, mei_me_pch8_cfg)}, -- GitLab From 11aa5c10102a425dde885b6b056e1fe354bcbb88 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 1 Dec 2016 09:18:28 +0100 Subject: [PATCH 0140/1442] block: protect iterate_bdevs() against concurrent close commit af309226db916e2c6e08d3eba3fa5c34225200c4 upstream. If a block device is closed while iterate_bdevs() is handling it, the following NULL pointer dereference occurs because bdev->b_disk is NULL in bdev_get_queue(), which is called from blk_get_backing_dev_info() (in turn called by the mapping_cap_writeback_dirty() call in __filemap_fdatawrite_range()): BUG: unable to handle kernel NULL pointer dereference at 0000000000000508 IP: [] blk_get_backing_dev_info+0x10/0x20 PGD 9e62067 PUD 9ee8067 PMD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 1 PID: 2422 Comm: sync Not tainted 4.5.0-rc7+ #400 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) task: ffff880009f4d700 ti: ffff880009f5c000 task.ti: ffff880009f5c000 RIP: 0010:[] [] blk_get_backing_dev_info+0x10/0x20 RSP: 0018:ffff880009f5fe68 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88000ec17a38 RCX: ffffffff81a4e940 RDX: 7fffffffffffffff RSI: 0000000000000000 RDI: ffff88000ec176c0 RBP: ffff880009f5fe68 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff88000ec17860 R13: ffffffff811b25c0 R14: ffff88000ec178e0 R15: ffff88000ec17a38 FS: 00007faee505d700(0000) GS:ffff88000fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000508 CR3: 0000000009e8a000 CR4: 00000000000006e0 Stack: ffff880009f5feb8 ffffffff8112e7f5 0000000000000000 7fffffffffffffff 0000000000000000 0000000000000000 7fffffffffffffff 0000000000000001 ffff88000ec178e0 ffff88000ec17860 ffff880009f5fec8 ffffffff8112e81f Call Trace: [] __filemap_fdatawrite_range+0x85/0x90 [] filemap_fdatawrite+0x1f/0x30 [] fdatawrite_one_bdev+0x16/0x20 [] iterate_bdevs+0xf2/0x130 [] sys_sync+0x63/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x76 Code: 0f 1f 44 00 00 48 8b 87 f0 00 00 00 55 48 89 e5 <48> 8b 80 08 05 00 00 5d RIP [] blk_get_backing_dev_info+0x10/0x20 RSP CR2: 0000000000000508 ---[ end trace 2487336ceb3de62d ]--- The crash is easily reproducible by running the following command, if an msleep(100) is inserted before the call to func() in iterate_devs(): while :; do head -c1 /dev/nullb0; done > /dev/null & while :; do sync; done Fix it by holding the bd_mutex across the func() call and only calling func() if the bdev is opened. Fixes: 5c0d6b60a0ba ("vfs: Create function for iterating over block devices") Reported-and-tested-by: Wei Fang Signed-off-by: Rabin Vincent Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 9166b9f63d33..092a2eed1628 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1950,6 +1950,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) spin_lock(&blockdev_superblock->s_inode_list_lock); list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { struct address_space *mapping = inode->i_mapping; + struct block_device *bdev; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || @@ -1970,8 +1971,12 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) */ iput(old_inode); old_inode = inode; + bdev = I_BDEV(inode); - func(I_BDEV(inode), arg); + mutex_lock(&bdev->bd_mutex); + if (bdev->bd_openers) + func(bdev, arg); + mutex_unlock(&bdev->bd_mutex); spin_lock(&blockdev_superblock->s_inode_list_lock); } -- GitLab From fbb28e76454fd7b9c59e5e35702982ee6d0dae8b Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Wed, 16 Nov 2016 00:55:57 +0100 Subject: [PATCH 0141/1442] vt: fix Scroll Lock LED trigger name commit 31b5929d533f5183972cf57a7844b456ed996f3c upstream. There is a disagreement between drivers/tty/vt/keyboard.c and drivers/input/input-leds.c with regard to what is a Scroll Lock LED trigger name: input calls it "kbd-scrolllock", but vt calls it "kbd-scrollock" (two l's). This prevents Scroll Lock LED trigger from binding to this LED by default. Since it is a scroLL Lock LED, this interface was introduced only about a year ago and in an Internet search people seem to reference this trigger only to set it to this LED let's simply rename it to "kbd-scrolllock". Also, it looks like this was supposed to be changed before this code was merged: https://lkml.org/lkml/2015/6/9/697 but it was done only on the input side. Signed-off-by: Maciej S. Szmigiero Acked-by: Samuel Thibault Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/keyboard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 0f8caae4267d..ece10e6b731b 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -982,7 +982,7 @@ static void kbd_led_trigger_activate(struct led_classdev *cdev) KBD_LED_TRIGGER((_led_bit) + 8, _name) static struct kbd_led_trigger kbd_led_triggers[] = { - KBD_LED_TRIGGER(VC_SCROLLOCK, "kbd-scrollock"), + KBD_LED_TRIGGER(VC_SCROLLOCK, "kbd-scrolllock"), KBD_LED_TRIGGER(VC_NUMLOCK, "kbd-numlock"), KBD_LED_TRIGGER(VC_CAPSLOCK, "kbd-capslock"), KBD_LED_TRIGGER(VC_KANALOCK, "kbd-kanalock"), -- GitLab From 2d2914349286d29e57b6536c01251c1332a8e784 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 18 Nov 2016 14:17:31 +0200 Subject: [PATCH 0142/1442] stm class: Fix device leak in open error path commit a0ebf519b8a2666438d999c62995618c710573e5 upstream. Make sure to drop the reference taken by class_find_device() also on allocation errors in open(). Signed-off-by: Johan Hovold Fixes: 7bd1d4093c2f ("stm class: Introduce an abstraction for...") Signed-off-by: Alexander Shishkin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/stm/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index 51f81d64ca37..a6ea387b5b00 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -361,7 +361,7 @@ static int stm_char_open(struct inode *inode, struct file *file) struct stm_file *stmf; struct device *dev; unsigned int major = imajor(inode); - int err = -ENODEV; + int err = -ENOMEM; dev = class_find_device(&stm_class, NULL, &major, major_match); if (!dev) @@ -369,8 +369,9 @@ static int stm_char_open(struct inode *inode, struct file *file) stmf = kzalloc(sizeof(*stmf), GFP_KERNEL); if (!stmf) - return -ENOMEM; + goto err_put_device; + err = -ENODEV; stm_output_init(&stmf->output); stmf->stm = to_stm_device(dev); @@ -382,9 +383,10 @@ static int stm_char_open(struct inode *inode, struct file *file) return nonseekable_open(inode, file); err_free: + kfree(stmf); +err_put_device: /* matches class_find_device() above */ put_device(dev); - kfree(stmf); return err; } -- GitLab From 6089f8712dcbfc2ee20f6d9af2f4550ddf88e7d9 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Fri, 21 Oct 2016 06:33:29 -0700 Subject: [PATCH 0143/1442] scsi: megaraid_sas: For SRIOV enabled firmware, ensure VF driver waits for 30secs before reset commit 18e1c7f68a5814442abad849abe6eacbf02ffd7c upstream. For SRIOV enabled firmware, if there is a OCR(online controller reset) possibility driver set the convert flag to 1, which is not happening if there are outstanding commands even after 180 seconds. As driver does not set convert flag to 1 and still making the OCR to run, VF(Virtual function) driver is directly writing on to the register instead of waiting for 30 seconds. Setting convert flag to 1 will cause VF driver will wait for 30 secs before going for reset. Signed-off-by: Kiran Kumar Kasturi Signed-off-by: Sumit Saxena Reviewed-by: Hannes Reinecke Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 52d8bbf7feb5..61be7ed73a7c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2823,6 +2823,7 @@ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance, dev_err(&instance->pdev->dev, "pending commands remain after waiting, " "will reset adapter scsi%d.\n", instance->host->host_no); + *convert = 1; retval = 1; } out: -- GitLab From 113783ec1cdbc208ca10927348cf37b983ea3b34 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Fri, 21 Oct 2016 06:33:35 -0700 Subject: [PATCH 0144/1442] scsi: megaraid_sas: Do not set MPI2_TYPE_CUDA for JBOD FP path for FW which does not support JBOD sequence map commit d5573584429254a14708cf8375c47092b5edaf2c upstream. Signed-off-by: Sumit Saxena Reviewed-by: Hannes Reinecke Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 61be7ed73a7c..bd04bd01d34a 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2000,6 +2000,8 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, io_request->DevHandle = pd_sync->seq[pd_index].devHandle; pRAID_Context->regLockFlags |= (MR_RL_FLAGS_SEQ_NUM_ENABLE|MR_RL_FLAGS_GRANT_DESTINATION_CUDA); + pRAID_Context->Type = MPI2_TYPE_CUDA; + pRAID_Context->nseg = 0x1; } else if (fusion->fast_path_io) { pRAID_Context->VirtualDiskTgtId = cpu_to_le16(device_id); pRAID_Context->configSeqNum = 0; @@ -2035,12 +2037,10 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, pRAID_Context->timeoutValue = cpu_to_le16((os_timeout_value > timeout_limit) ? timeout_limit : os_timeout_value); - if (fusion->adapter_type == INVADER_SERIES) { - pRAID_Context->Type = MPI2_TYPE_CUDA; - pRAID_Context->nseg = 0x1; + if (fusion->adapter_type == INVADER_SERIES) io_request->IoFlags |= cpu_to_le16(MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH); - } + cmd->request_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_FP_IO << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); -- GitLab From 8032a30081c1361e06ba70f08f34fd972772060b Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sun, 11 Dec 2016 22:05:56 +0530 Subject: [PATCH 0145/1442] iscsi-target: Return error if unable to add network portal commit 83337e544323a8bd7492994d64af339175ac7107 upstream. If iscsit_tpg_add_network_portal() fails then return error code instead of 0 to user space. If iscsi-target returns 0 then user space keeps on retrying same command infinitely, targetcli or echo hangs till command completes with non zero return value. In some cases it is possible that add network portal command never completes with success even after retrying multiple times, for example - cxgbit_setup_np() always returns -EINVAL if portal IP does not belong to Chelsio adapter interface. Signed-off-by: Varun Prakash Signed-off-by: Bart Van Assche [ bvanassche: Added "Fixes:" and "Cc: stable" tags ] Fixes: commit d4b3fa4b0881 ("iscsi-target: Make iscsi_tpg_np driver show/store use generic code") Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_configfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 923c032f0b95..e980e2d0c2db 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -100,8 +100,10 @@ static ssize_t lio_target_np_driver_store(struct config_item *item, tpg_np_new = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, tpg_np, type); - if (IS_ERR(tpg_np_new)) + if (IS_ERR(tpg_np_new)) { + rc = PTR_ERR(tpg_np_new); goto out; + } } else { tpg_np_new = iscsit_tpg_locate_child_np(tpg_np, type); if (tpg_np_new) { -- GitLab From 2ce34d9da4273b10ad8a680a8258278bcf805c9e Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 9 Dec 2016 17:16:31 +0100 Subject: [PATCH 0146/1442] scsi: zfcp: fix use-after-"free" in FC ingress path after TMF commit dac37e15b7d511e026a9313c8c46794c144103cd upstream. When SCSI EH invokes zFCP's callbacks for eh_device_reset_handler() and eh_target_reset_handler(), it expects us to relent the ownership over the given scsi_cmnd and all other scsi_cmnds within the same scope - LUN or target - when returning with SUCCESS from the callback ('release' them). SCSI EH can then reuse those commands. We did not follow this rule to release commands upon SUCCESS; and if later a reply arrived for one of those supposed to be released commands, we would still make use of the scsi_cmnd in our ingress tasklet. This will at least result in undefined behavior or a kernel panic because of a wrong kernel pointer dereference. To fix this, we NULLify all pointers to scsi_cmnds (struct zfcp_fsf_req *)->data in the matching scope if a TMF was successful. This is done under the locks (struct zfcp_adapter *)->abort_lock and (struct zfcp_reqlist *)->lock to prevent the requests from being removed from the request-hashtable, and the ingress tasklet from making use of the scsi_cmnd-pointer in zfcp_fsf_fcp_cmnd_handler(). For cases where a reply arrives during SCSI EH, but before we get a chance to NULLify the pointer - but before we return from the callback -, we assume that the code is protected from races via the CAS operation in blk_complete_request() that is called in scsi_done(). The following stacktrace shows an example for a crash resulting from the previous behavior: Unable to handle kernel pointer dereference at virtual kernel address fffffee17a672000 Oops: 0038 [#1] SMP CPU: 2 PID: 0 Comm: swapper/2 Not tainted task: 00000003f7ff5be0 ti: 00000003f3d38000 task.ti: 00000003f3d38000 Krnl PSW : 0404d00180000000 00000000001156b0 (smp_vcpu_scheduled+0x18/0x40) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 EA:3 Krnl GPRS: 000000200000007e 0000000000000000 fffffee17a671fd8 0000000300000015 ffffffff80000000 00000000005dfde8 07000003f7f80e00 000000004fa4e800 000000036ce8d8f8 000000036ce8d9c0 00000003ece8fe00 ffffffff969c9e93 00000003fffffffd 000000036ce8da10 00000000003bf134 00000003f3b07918 Krnl Code: 00000000001156a2: a7190000 lghi %r1,0 00000000001156a6: a7380015 lhi %r3,21 #00000000001156aa: e32050000008 ag %r2,0(%r5) >00000000001156b0: 482022b0 lh %r2,688(%r2) 00000000001156b4: ae123000 sigp %r1,%r2,0(%r3) 00000000001156b8: b2220020 ipm %r2 00000000001156bc: 8820001c srl %r2,28 00000000001156c0: c02700000001 xilf %r2,1 Call Trace: ([<0000000000000000>] 0x0) [<000003ff807bdb8e>] zfcp_fsf_fcp_cmnd_handler+0x3de/0x490 [zfcp] [<000003ff807be30a>] zfcp_fsf_req_complete+0x252/0x800 [zfcp] [<000003ff807c0a48>] zfcp_fsf_reqid_check+0xe8/0x190 [zfcp] [<000003ff807c194e>] zfcp_qdio_int_resp+0x66/0x188 [zfcp] [<000003ff80440c64>] qdio_kick_handler+0xdc/0x310 [qdio] [<000003ff804463d0>] __tiqdio_inbound_processing+0xf8/0xcd8 [qdio] [<0000000000141fd4>] tasklet_action+0x9c/0x170 [<0000000000141550>] __do_softirq+0xe8/0x258 [<000000000010ce0a>] do_softirq+0xba/0xc0 [<000000000014187c>] irq_exit+0xc4/0xe8 [<000000000046b526>] do_IRQ+0x146/0x1d8 [<00000000005d6a3c>] io_return+0x0/0x8 [<00000000005d6422>] vtime_stop_cpu+0x4a/0xa0 ([<0000000000000000>] 0x0) [<0000000000103d8a>] arch_cpu_idle+0xa2/0xb0 [<0000000000197f94>] cpu_startup_entry+0x13c/0x1f8 [<0000000000114782>] smp_start_secondary+0xda/0xe8 [<00000000005d6efe>] restart_int_handler+0x56/0x6c [<0000000000000000>] 0x0 Last Breaking-Event-Address: [<00000000003bf12e>] arch_spin_lock_wait+0x56/0xb0 Suggested-by: Steffen Maier Signed-off-by: Benjamin Block Fixes: ea127f9754 ("[PATCH] s390 (7/7): zfcp host adapter.") (tglx/history.git) Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.h | 11 ++++++ drivers/s390/scsi/zfcp_reqlist.h | 30 ++++++++++++++++- drivers/s390/scsi/zfcp_scsi.c | 57 ++++++++++++++++++++++++++++++-- 3 files changed, 95 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index 36d07584271d..2d06b5d2c05b 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -388,4 +388,15 @@ void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag) _zfcp_dbf_scsi(tmp_tag, 1, scmnd, NULL); } +/** + * zfcp_dbf_scsi_nullcmnd() - trace NULLify of SCSI command in dev/tgt-reset. + * @scmnd: SCSI command that was NULLified. + * @fsf_req: request that owned @scmnd. + */ +static inline void zfcp_dbf_scsi_nullcmnd(struct scsi_cmnd *scmnd, + struct zfcp_fsf_req *fsf_req) +{ + _zfcp_dbf_scsi("scfc__1", 3, scmnd, fsf_req); +} + #endif /* ZFCP_DBF_H */ diff --git a/drivers/s390/scsi/zfcp_reqlist.h b/drivers/s390/scsi/zfcp_reqlist.h index 7c2c6194dfca..703fce59befe 100644 --- a/drivers/s390/scsi/zfcp_reqlist.h +++ b/drivers/s390/scsi/zfcp_reqlist.h @@ -4,7 +4,7 @@ * Data structure and helper functions for tracking pending FSF * requests. * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009, 2016 */ #ifndef ZFCP_REQLIST_H @@ -180,4 +180,32 @@ static inline void zfcp_reqlist_move(struct zfcp_reqlist *rl, spin_unlock_irqrestore(&rl->lock, flags); } +/** + * zfcp_reqlist_apply_for_all() - apply a function to every request. + * @rl: the requestlist that contains the target requests. + * @f: the function to apply to each request; the first parameter of the + * function will be the target-request; the second parameter is the same + * pointer as given with the argument @data. + * @data: freely chosen argument; passed through to @f as second parameter. + * + * Uses :c:macro:`list_for_each_entry` to iterate over the lists in the hash- + * table (not a 'safe' variant, so don't modify the list). + * + * Holds @rl->lock over the entire request-iteration. + */ +static inline void +zfcp_reqlist_apply_for_all(struct zfcp_reqlist *rl, + void (*f)(struct zfcp_fsf_req *, void *), void *data) +{ + struct zfcp_fsf_req *req; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&rl->lock, flags); + for (i = 0; i < ZFCP_REQ_LIST_BUCKETS; i++) + list_for_each_entry(req, &rl->buckets[i], list) + f(req, data); + spin_unlock_irqrestore(&rl->lock, flags); +} + #endif /* ZFCP_REQLIST_H */ diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 9069f98a1817..11cd18c134c1 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -3,7 +3,7 @@ * * Interface to Linux SCSI midlayer. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #define KMSG_COMPONENT "zfcp" @@ -209,6 +209,57 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) return retval; } +struct zfcp_scsi_req_filter { + u8 tmf_scope; + u32 lun_handle; + u32 port_handle; +}; + +static void zfcp_scsi_forget_cmnd(struct zfcp_fsf_req *old_req, void *data) +{ + struct zfcp_scsi_req_filter *filter = + (struct zfcp_scsi_req_filter *)data; + + /* already aborted - prevent side-effects - or not a SCSI command */ + if (old_req->data == NULL || old_req->fsf_command != FSF_QTCB_FCP_CMND) + return; + + /* (tmf_scope == FCP_TMF_TGT_RESET || tmf_scope == FCP_TMF_LUN_RESET) */ + if (old_req->qtcb->header.port_handle != filter->port_handle) + return; + + if (filter->tmf_scope == FCP_TMF_LUN_RESET && + old_req->qtcb->header.lun_handle != filter->lun_handle) + return; + + zfcp_dbf_scsi_nullcmnd((struct scsi_cmnd *)old_req->data, old_req); + old_req->data = NULL; +} + +static void zfcp_scsi_forget_cmnds(struct zfcp_scsi_dev *zsdev, u8 tm_flags) +{ + struct zfcp_adapter *adapter = zsdev->port->adapter; + struct zfcp_scsi_req_filter filter = { + .tmf_scope = FCP_TMF_TGT_RESET, + .port_handle = zsdev->port->handle, + }; + unsigned long flags; + + if (tm_flags == FCP_TMF_LUN_RESET) { + filter.tmf_scope = FCP_TMF_LUN_RESET; + filter.lun_handle = zsdev->lun_handle; + } + + /* + * abort_lock secures against other processings - in the abort-function + * and normal cmnd-handler - of (struct zfcp_fsf_req *)->data + */ + write_lock_irqsave(&adapter->abort_lock, flags); + zfcp_reqlist_apply_for_all(adapter->req_list, zfcp_scsi_forget_cmnd, + &filter); + write_unlock_irqrestore(&adapter->abort_lock, flags); +} + static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) { struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(scpnt->device); @@ -241,8 +292,10 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) { zfcp_dbf_scsi_devreset("fail", scpnt, tm_flags); retval = FAILED; - } else + } else { zfcp_dbf_scsi_devreset("okay", scpnt, tm_flags); + zfcp_scsi_forget_cmnds(zfcp_sdev, tm_flags); + } zfcp_fsf_req_free(fsf_req); return retval; -- GitLab From 744807cb251f331ca0720b8c81a54e9db7b13ac5 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 9 Dec 2016 17:16:32 +0100 Subject: [PATCH 0147/1442] scsi: zfcp: do not trace pure benign residual HBA responses at default level commit 56d23ed7adf3974f10e91b643bd230e9c65b5f79 upstream. Since quite a while, Linux issues enough SCSI commands per scsi_device which successfully return with FCP_RESID_UNDER, FSF_FCP_RSP_AVAILABLE, and SAM_STAT_GOOD. This floods the HBA trace area and we cannot see other and important HBA trace records long enough. Therefore, do not trace HBA response errors for pure benign residual under counts at the default trace level. This excludes benign residual under count combined with other validity bits set in FCP_RSP_IU, such as FCP_SNS_LEN_VAL. For all those other cases, we still do want to see both the HBA record and the corresponding SCSI record by default. Signed-off-by: Steffen Maier Fixes: a54ca0f62f95 ("[SCSI] zfcp: Redesign of the debug tracing for HBA records.") Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.h | 30 ++++++++++++++++++++++++++++-- drivers/s390/scsi/zfcp_fsf.h | 3 ++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index 2d06b5d2c05b..db186d44cfaf 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -2,7 +2,7 @@ * zfcp device driver * debug feature declarations * - * Copyright IBM Corp. 2008, 2015 + * Copyright IBM Corp. 2008, 2016 */ #ifndef ZFCP_DBF_H @@ -283,6 +283,30 @@ struct zfcp_dbf { struct zfcp_dbf_scsi scsi_buf; }; +/** + * zfcp_dbf_hba_fsf_resp_suppress - true if we should not trace by default + * @req: request that has been completed + * + * Returns true if FCP response with only benign residual under count. + */ +static inline +bool zfcp_dbf_hba_fsf_resp_suppress(struct zfcp_fsf_req *req) +{ + struct fsf_qtcb *qtcb = req->qtcb; + u32 fsf_stat = qtcb->header.fsf_status; + struct fcp_resp *fcp_rsp; + u8 rsp_flags, fr_status; + + if (qtcb->prefix.qtcb_type != FSF_IO_COMMAND) + return false; /* not an FCP response */ + fcp_rsp = (struct fcp_resp *)&qtcb->bottom.io.fcp_rsp; + rsp_flags = fcp_rsp->fr_flags; + fr_status = fcp_rsp->fr_status; + return (fsf_stat == FSF_FCP_RSP_AVAILABLE) && + (rsp_flags == FCP_RESID_UNDER) && + (fr_status == SAM_STAT_GOOD); +} + static inline void zfcp_dbf_hba_fsf_resp(char *tag, int level, struct zfcp_fsf_req *req) { @@ -304,7 +328,9 @@ void zfcp_dbf_hba_fsf_response(struct zfcp_fsf_req *req) zfcp_dbf_hba_fsf_resp("fs_perr", 1, req); } else if (qtcb->header.fsf_status != FSF_GOOD) { - zfcp_dbf_hba_fsf_resp("fs_ferr", 1, req); + zfcp_dbf_hba_fsf_resp("fs_ferr", + zfcp_dbf_hba_fsf_resp_suppress(req) + ? 5 : 1, req); } else if ((req->fsf_command == FSF_QTCB_OPEN_PORT_WITH_DID) || (req->fsf_command == FSF_QTCB_OPEN_LUN)) { diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h index be1c04b334c5..ea3c76ac0de1 100644 --- a/drivers/s390/scsi/zfcp_fsf.h +++ b/drivers/s390/scsi/zfcp_fsf.h @@ -3,7 +3,7 @@ * * Interface to the FSF support functions. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #ifndef FSF_H @@ -78,6 +78,7 @@ #define FSF_APP_TAG_CHECK_FAILURE 0x00000082 #define FSF_REF_TAG_CHECK_FAILURE 0x00000083 #define FSF_ADAPTER_STATUS_AVAILABLE 0x000000AD +#define FSF_FCP_RSP_AVAILABLE 0x000000AF #define FSF_UNKNOWN_COMMAND 0x000000E2 #define FSF_UNKNOWN_OP_SUBTYPE 0x000000E3 #define FSF_INVALID_COMMAND_OPTION 0x000000E5 -- GitLab From 2a940b853ef679cd981e38f23c756d4335a3375c Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 9 Dec 2016 17:16:33 +0100 Subject: [PATCH 0148/1442] scsi: zfcp: fix rport unblock race with LUN recovery commit 6f2ce1c6af37191640ee3ff6e8fc39ea10352f4c upstream. It is unavoidable that zfcp_scsi_queuecommand() has to finish requests with DID_IMM_RETRY (like fc_remote_port_chkready()) during the time window when zfcp detected an unavailable rport but fc_remote_port_delete(), which is asynchronous via zfcp_scsi_schedule_rport_block(), has not yet blocked the rport. However, for the case when the rport becomes available again, we should prevent unblocking the rport too early. In contrast to other FCP LLDDs, zfcp has to open each LUN with the FCP channel hardware before it can send I/O to a LUN. So if a port already has LUNs attached and we unblock the rport just after port recovery, recoveries of LUNs behind this port can still be pending which in turn force zfcp_scsi_queuecommand() to unnecessarily finish requests with DID_IMM_RETRY. This also opens a time window with unblocked rport (until the followup LUN reopen recovery has finished). If a scsi_cmnd timeout occurs during this time window fc_timed_out() cannot work as desired and such command would indeed time out and trigger scsi_eh. This prevents a clean and timely path failover. This should not happen if the path issue can be recovered on FC transport layer such as path issues involving RSCNs. Fix this by only calling zfcp_scsi_schedule_rport_register(), to asynchronously trigger fc_remote_port_add(), after all LUN recoveries as children of the rport have finished and no new recoveries of equal or higher order were triggered meanwhile. Finished intentionally includes any recovery result no matter if successful or failed (still unblock rport so other successful LUNs work). For simplicity, we check after each finished LUN recovery if there is another LUN recovery pending on the same port and then do nothing. We handle the special case of a successful recovery of a port without LUN children the same way without changing this case's semantics. For debugging we introduce 2 new trace records written if the rport unblock attempt was aborted due to still unfinished or freshly triggered recovery. The records are only written above the default trace level. Benjamin noticed the important special case of new recovery that can be triggered between having given up the erp_lock and before calling zfcp_erp_action_cleanup() within zfcp_erp_strategy(). We must avoid the following sequence: ERP thread rport_work other context ------------------------- -------------- -------------------------------- port is unblocked, rport still blocked, due to pending/running ERP action, so ((port->status & ...UNBLOCK) != 0) and (port->rport == NULL) unlock ERP zfcp_erp_action_cleanup() case ZFCP_ERP_ACTION_REOPEN_LUN: zfcp_erp_try_rport_unblock() ((status & ...UNBLOCK) != 0) [OLD!] zfcp_erp_port_reopen() lock ERP zfcp_erp_port_block() port->status clear ...UNBLOCK unlock ERP zfcp_scsi_schedule_rport_block() port->rport_task = RPORT_DEL queue_work(rport_work) zfcp_scsi_rport_work() (port->rport_task != RPORT_ADD) port->rport_task = RPORT_NONE zfcp_scsi_rport_block() if (!port->rport) return zfcp_scsi_schedule_rport_register() port->rport_task = RPORT_ADD queue_work(rport_work) zfcp_scsi_rport_work() (port->rport_task == RPORT_ADD) port->rport_task = RPORT_NONE zfcp_scsi_rport_register() (port->rport == NULL) rport = fc_remote_port_add() port->rport = rport; Now the rport was erroneously unblocked while the zfcp_port is blocked. This is another situation we want to avoid due to scsi_eh potential. This state would at least remain until the new recovery from the other context finished successfully, or potentially forever if it failed. In order to close this race, we take the erp_lock inside zfcp_erp_try_rport_unblock() when checking the status of zfcp_port or LUN. With that, the possible corresponding rport state sequences would be: (unblock[ERP thread],block[other context]) if the ERP thread gets erp_lock first and still sees ((port->status & ...UNBLOCK) != 0), (block[other context],NOP[ERP thread]) if the ERP thread gets erp_lock after the other context has already cleard ...UNBLOCK from port->status. Since checking fields of struct erp_action is unsafe because they could have been overwritten (re-used for new recovery) meanwhile, we only check status of zfcp_port and LUN since these are only changed under erp_lock elsewhere. Regarding the check of the proper status flags (port or port_forced are similar to the shown adapter recovery): [zfcp_erp_adapter_shutdown()] zfcp_erp_adapter_reopen() zfcp_erp_adapter_block() * clear UNBLOCK ---------------------------------------+ zfcp_scsi_schedule_rports_block() | write_lock_irqsave(&adapter->erp_lock, flags);-------+ | zfcp_erp_action_enqueue() | | zfcp_erp_setup_act() | | * set ERP_INUSE -----------------------------------|--|--+ write_unlock_irqrestore(&adapter->erp_lock, flags);--+ | | .context-switch. | | zfcp_erp_thread() | | zfcp_erp_strategy() | | write_lock_irqsave(&adapter->erp_lock, flags);------+ | | ... | | | zfcp_erp_strategy_check_target() | | | zfcp_erp_strategy_check_adapter() | | | zfcp_erp_adapter_unblock() | | | * set UNBLOCK -----------------------------------|--+ | zfcp_erp_action_dequeue() | | * clear ERP_INUSE ---------------------------------|-----+ ... | write_unlock_irqrestore(&adapter->erp_lock, flags);-+ Hence, we should check for both UNBLOCK and ERP_INUSE because they are interleaved. Also we need to explicitly check ERP_FAILED for the link down case which currently does not clear the UNBLOCK flag in zfcp_fsf_link_down_info_eval(). Signed-off-by: Steffen Maier Fixes: 8830271c4819 ("[SCSI] zfcp: Dont fail SCSI commands when transitioning to blocked fc_rport") Fixes: a2fa0aede07c ("[SCSI] zfcp: Block FC transport rports early on errors") Fixes: 5f852be9e11d ("[SCSI] zfcp: Fix deadlock between zfcp ERP and SCSI") Fixes: 338151e06608 ("[SCSI] zfcp: make use of fc_remote_port_delete when target port is unavailable") Fixes: 3859f6a248cb ("[PATCH] zfcp: add rports to enable scsi_add_device to work again") Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.c | 17 ++++++++-- drivers/s390/scsi/zfcp_erp.c | 61 +++++++++++++++++++++++++++++++++-- drivers/s390/scsi/zfcp_ext.h | 4 ++- drivers/s390/scsi/zfcp_scsi.c | 4 +-- 4 files changed, 77 insertions(+), 9 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 581001989937..d5bf36ec8a75 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -289,11 +289,12 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter, /** - * zfcp_dbf_rec_run - trace event related to running recovery + * zfcp_dbf_rec_run_lvl - trace event related to running recovery + * @level: trace level to be used for event * @tag: identifier for event * @erp: erp_action running */ -void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) +void zfcp_dbf_rec_run_lvl(int level, char *tag, struct zfcp_erp_action *erp) { struct zfcp_dbf *dbf = erp->adapter->dbf; struct zfcp_dbf_rec *rec = &dbf->rec_buf; @@ -319,10 +320,20 @@ void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) else rec->u.run.rec_count = atomic_read(&erp->adapter->erp_counter); - debug_event(dbf->rec, 1, rec, sizeof(*rec)); + debug_event(dbf->rec, level, rec, sizeof(*rec)); spin_unlock_irqrestore(&dbf->rec_lock, flags); } +/** + * zfcp_dbf_rec_run - trace event related to running recovery + * @tag: identifier for event + * @erp: erp_action running + */ +void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) +{ + zfcp_dbf_rec_run_lvl(1, tag, erp); +} + /** * zfcp_dbf_rec_run_wka - trace wka port event with info like running recovery * @tag: identifier for event diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index a59d678125bd..7ccfce559034 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -3,7 +3,7 @@ * * Error Recovery Procedures (ERP). * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #define KMSG_COMPONENT "zfcp" @@ -1204,6 +1204,62 @@ static void zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action) } } +/** + * zfcp_erp_try_rport_unblock - unblock rport if no more/new recovery + * @port: zfcp_port whose fc_rport we should try to unblock + */ +static void zfcp_erp_try_rport_unblock(struct zfcp_port *port) +{ + unsigned long flags; + struct zfcp_adapter *adapter = port->adapter; + int port_status; + struct Scsi_Host *shost = adapter->scsi_host; + struct scsi_device *sdev; + + write_lock_irqsave(&adapter->erp_lock, flags); + port_status = atomic_read(&port->status); + if ((port_status & ZFCP_STATUS_COMMON_UNBLOCKED) == 0 || + (port_status & (ZFCP_STATUS_COMMON_ERP_INUSE | + ZFCP_STATUS_COMMON_ERP_FAILED)) != 0) { + /* new ERP of severity >= port triggered elsewhere meanwhile or + * local link down (adapter erp_failed but not clear unblock) + */ + zfcp_dbf_rec_run_lvl(4, "ertru_p", &port->erp_action); + write_unlock_irqrestore(&adapter->erp_lock, flags); + return; + } + spin_lock(shost->host_lock); + __shost_for_each_device(sdev, shost) { + struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev); + int lun_status; + + if (zsdev->port != port) + continue; + /* LUN under port of interest */ + lun_status = atomic_read(&zsdev->status); + if ((lun_status & ZFCP_STATUS_COMMON_ERP_FAILED) != 0) + continue; /* unblock rport despite failed LUNs */ + /* LUN recovery not given up yet [maybe follow-up pending] */ + if ((lun_status & ZFCP_STATUS_COMMON_UNBLOCKED) == 0 || + (lun_status & ZFCP_STATUS_COMMON_ERP_INUSE) != 0) { + /* LUN blocked: + * not yet unblocked [LUN recovery pending] + * or meanwhile blocked [new LUN recovery triggered] + */ + zfcp_dbf_rec_run_lvl(4, "ertru_l", &zsdev->erp_action); + spin_unlock(shost->host_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); + return; + } + } + /* now port has no child or all children have completed recovery, + * and no ERP of severity >= port was meanwhile triggered elsewhere + */ + zfcp_scsi_schedule_rport_register(port); + spin_unlock(shost->host_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); +} + static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) { struct zfcp_adapter *adapter = act->adapter; @@ -1214,6 +1270,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) case ZFCP_ERP_ACTION_REOPEN_LUN: if (!(act->status & ZFCP_STATUS_ERP_NO_REF)) scsi_device_put(sdev); + zfcp_erp_try_rport_unblock(port); break; case ZFCP_ERP_ACTION_REOPEN_PORT: @@ -1224,7 +1281,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) */ if (act->step != ZFCP_ERP_STEP_UNINITIALIZED) if (result == ZFCP_ERP_SUCCEEDED) - zfcp_scsi_schedule_rport_register(port); + zfcp_erp_try_rport_unblock(port); /* fall through */ case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: put_device(&port->dev); diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index c8fed9fa1cca..21c8c689b02b 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -3,7 +3,7 @@ * * External function declarations. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #ifndef ZFCP_EXT_H @@ -35,6 +35,8 @@ extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *); extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *, struct zfcp_port *, struct scsi_device *, u8, u8); extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *); +extern void zfcp_dbf_rec_run_lvl(int level, char *tag, + struct zfcp_erp_action *erp); extern void zfcp_dbf_rec_run_wka(char *, struct zfcp_fc_wka_port *, u64); extern void zfcp_dbf_hba_fsf_uss(char *, struct zfcp_fsf_req *); extern void zfcp_dbf_hba_fsf_res(char *, int, struct zfcp_fsf_req *); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 11cd18c134c1..07ffdbb5107f 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -88,9 +88,7 @@ int zfcp_scsi_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scpnt) } if (unlikely(!(status & ZFCP_STATUS_COMMON_UNBLOCKED))) { - /* This could be either - * open LUN pending: this is temporary, will result in - * open LUN or ERP_FAILED, so retry command + /* This could be * call to rport_delete pending: mimic retry from * fc_remote_port_chkready until rport is BLOCKED */ -- GitLab From 9d33a399566771b023a08f490344b70a200c87da Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 13 Dec 2016 09:25:21 +0800 Subject: [PATCH 0149/1442] scsi: avoid a permanent stop of the scsi device's request queue commit d2a145252c52792bc59e4767b486b26c430af4bb upstream. A race between scanning and fc_remote_port_delete() may result in a permanent stop if the device gets blocked before scsi_sysfs_add_sdev() and unblocked after. The reason is that blocking a device sets both the SDEV_BLOCKED state and the QUEUE_FLAG_STOPPED. However, scsi_sysfs_add_sdev() unconditionally sets SDEV_RUNNING which causes the device to be ignored by scsi_target_unblock() and thus never have its QUEUE_FLAG_STOPPED cleared leading to a device which is apparently running but has a stopped queue. We actually have two places where SDEV_RUNNING is set: once in scsi_add_lun() which respects the blocked flag and once in scsi_sysfs_add_sdev() which doesn't. Since the second set is entirely spurious, simply remove it to fix the problem. Reported-by: Zengxi Chen Signed-off-by: Wei Fang Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_sysfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 07349270535d..82dfe07b1d47 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1204,10 +1204,6 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) struct request_queue *rq = sdev->request_queue; struct scsi_target *starget = sdev->sdev_target; - error = scsi_device_set_state(sdev, SDEV_RUNNING); - if (error) - return error; - error = scsi_target_add(starget); if (error) return error; -- GitLab From ce7ec3d7526cb26307eb5fee516b2873a88ca3ef Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 19 Dec 2016 11:38:38 -0800 Subject: [PATCH 0150/1442] ARC: mm: arc700: Don't assume 2 colours for aliasing VIPT dcache commit 08fe007968b2b45e831daf74899f79a54d73f773 upstream. An ARC700 customer reported linux boot crashes when upgrading to bigger L1 dcache (64K from 32K). Turns out they had an aliasing VIPT config and current code only assumed 2 colours, while theirs had 4. So default to 4 colours and complain if there are fewer. Ideally this needs to be a Kconfig option, but heck that's too much of hassle for a single user. Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/cacheflush.h | 6 ++++-- arch/arc/mm/cache.c | 13 +++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index a093adbdb017..fc662f49c55a 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -85,6 +85,10 @@ void flush_anon_page(struct vm_area_struct *vma, */ #define PG_dc_clean PG_arch_1 +#define CACHE_COLORS_NUM 4 +#define CACHE_COLORS_MSK (CACHE_COLORS_NUM - 1) +#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & CACHE_COLORS_MSK) + /* * Simple wrapper over config option * Bootup code ensures that hardware matches kernel configuration @@ -94,8 +98,6 @@ static inline int cache_is_vipt_aliasing(void) return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); } -#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & 1) - /* * checks if two addresses (after page aligning) index into same cache set */ diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 50d71695cd4e..8147583c4434 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -979,11 +979,16 @@ void arc_cache_init(void) /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ if (is_isa_arcompact()) { int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); - - if (dc->alias && !handled) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - else if (!dc->alias && handled) + int num_colors = dc->sz_k/dc->assoc/TO_KB(PAGE_SIZE); + + if (dc->alias) { + if (!handled) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + if (CACHE_COLORS_NUM != num_colors) + panic("CACHE_COLORS_NUM not optimized for config\n"); + } else if (!dc->alias && handled) { panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + } } } -- GitLab From b3854cefe3d24af43fb8abb3eda09612899bb604 Mon Sep 17 00:00:00 2001 From: Yves-Alexis Perez Date: Fri, 11 Nov 2016 11:28:40 -0800 Subject: [PATCH 0151/1442] firmware: fix usermode helper fallback loading commit 2e700f8d85975f516ccaad821278c1fe66b2cc98 upstream. When you use the firmware usermode helper fallback with a timeout value set to a value greater than INT_MAX (2147483647) a cast overflow issue causes the timeout value to go negative and breaks all usermode helper loading. This regression was introduced through commit 68ff2a00dbf5 ("firmware_loader: handle timeout via wait_for_completion_interruptible_timeout()") on kernel v4.0. The firmware_class drivers relies on the firmware usermode helper fallback as a mechanism to look for firmware if the direct filesystem search failed only if: a) You've enabled CONFIG_FW_LOADER_USER_HELPER_FALLBACK (not many distros): Then all of these callers will rely on the fallback mechanism in case the firmware is not found through an initial direct filesystem lookup: o request_firmware() o request_firmware_into_buf() o request_firmware_nowait() b) If you've only enabled CONFIG_FW_LOADER_USER_HELPER (most distros): Then only callers using request_firmware_nowait() with the second argument set to false, this explicitly is requesting the UMH firmware fallback to be relied on in case the first filesystem lookup fails. Using Coccinelle SmPL grammar we have identified only two drivers explicitly requesting the UMH firmware fallback mechanism: - drivers/firmware/dell_rbu.c - drivers/leds/leds-lp55xx-common.c Since most distributions only enable CONFIG_FW_LOADER_USER_HELPER the biggest impact of this regression are users of the dell_rbu and leds-lp55xx-common device driver which required the UMH to find their respective needed firmwares. The default timeout for the UMH is set to 60 seconds always, as of commit 68ff2a00dbf5 ("firmware_loader: handle timeout via wait_for_completion_interruptible_timeout()") the timeout was bumped to MAX_JIFFY_OFFSET ((LONG_MAX >> 1)-1). Additionally the MAX_JIFFY_OFFSET value was also used if the timeout was configured by a user to 0. The following works: echo 2147483647 > /sys/class/firmware/timeout But both of the following set the timeout to MAX_JIFFY_OFFSET even if we display 0 back to userspace: echo 2147483648 > /sys/class/firmware/timeout cat /sys/class/firmware/timeout 0 echo 0> /sys/class/firmware/timeout cat /sys/class/firmware/timeout 0 A max value of INT_MAX (2147483647) seconds is therefore implicit due to the another cast with simple_strtol(). This fixes the secondary cast (the first one is simple_strtol() but its an issue only by forcing an implicit limit) by re-using the timeout variable and only setting retval in appropriate cases. Lastly worth noting systemd had ripped out the UMH firmware fallback mechanism from udev since udev 2014 via commit be2ea723b1d023b3d ("udev: remove userspace firmware loading support"), so as of systemd v217. Signed-off-by: Yves-Alexis Perez Fixes: 68ff2a00dbf5 "firmware_loader: handle timeout via wait_for_completion_interruptible_timeout()" Cc: Luis R. Rodriguez Cc: Ming Lei Cc: Bjorn Andersson Cc: Greg Kroah-Hartman Acked-by: Luis R. Rodriguez Reviewed-by: Bjorn Andersson [mcgrof@kernel.org: gave commit log a whole lot of love] Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 22d1760a4278..a95e1e572697 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -955,13 +955,14 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, timeout = MAX_JIFFY_OFFSET; } - retval = wait_for_completion_interruptible_timeout(&buf->completion, + timeout = wait_for_completion_interruptible_timeout(&buf->completion, timeout); - if (retval == -ERESTARTSYS || !retval) { + if (timeout == -ERESTARTSYS || !timeout) { + retval = timeout; mutex_lock(&fw_lock); fw_load_abort(fw_priv); mutex_unlock(&fw_lock); - } else if (retval > 0) { + } else if (timeout > 0) { retval = 0; } -- GitLab From 53bbee3348d88b9f68946e249a67beaff82aee6a Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 21 Nov 2016 12:13:58 +0100 Subject: [PATCH 0152/1442] s390/vmlogrdr: fix IUCV buffer allocation commit 5457e03de918f7a3e294eb9d26a608ab8a579976 upstream. The buffer for iucv_message_receive() needs to be below 2 GB. In __iucv_message_receive(), the buffer address is casted to an u32, which would result in either memory corruption or an addressing exception when using addresses >= 2 GB. Fix this by using GFP_DMA for the buffer allocation. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/char/vmlogrdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index e883063c7258..3167e8581994 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -870,7 +870,7 @@ static int __init vmlogrdr_init(void) goto cleanup; for (i=0; i < MAXMINOR; ++i ) { - sys_ser[i].buffer = (char *) get_zeroed_page(GFP_KERNEL); + sys_ser[i].buffer = (char *) get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!sys_ser[i].buffer) { rc = -ENOMEM; break; -- GitLab From 111e0ccaf979c688ca8b56e96561e7207e971b6a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Nov 2016 11:40:27 +0100 Subject: [PATCH 0153/1442] s390/kexec: use node 0 when re-adding crash kernel memory commit 9f88eb4df728aebcd2ddd154d99f1d75b428b897 upstream. When re-adding crash kernel memory within setup_resources() the function memblock_add() is used. That function will add memory by default to node "MAX_NUMNODES" instead of node 0, like the memory detection code does. In case of !NUMA this will trigger this warning when the kernel generates the vmemmap: Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead WARNING: CPU: 0 PID: 0 at mm/memblock.c:1261 memblock_virt_alloc_internal+0x76/0x220 CPU: 0 PID: 0 Comm: swapper Not tainted 4.9.0-rc6 #16 Call Trace: [<0000000000d0b2e8>] memblock_virt_alloc_try_nid+0x88/0xc8 [<000000000083c8ea>] __earlyonly_bootmem_alloc.constprop.1+0x42/0x50 [<000000000083e7f4>] vmemmap_populate+0x1ac/0x1e0 [<0000000000840136>] sparse_mem_map_populate+0x46/0x68 [<0000000000d0c59c>] sparse_init+0x184/0x238 [<0000000000cf45f6>] paging_init+0xbe/0xf8 [<0000000000cf1d4a>] setup_arch+0xa02/0xae0 [<0000000000ced75a>] start_kernel+0x72/0x450 [<0000000000100020>] _stext+0x20/0x80 If NUMA is selected numa_setup_memory() will fix the node assignments before the vmemmap will be populated; so this warning will only appear if NUMA is not selected. To fix this simply use memblock_add_node() and re-add crash kernel memory explicitly to node 0. Reported-and-tested-by: Christian Borntraeger Fixes: 4e042af463f8 ("s390/kexec: fix crash on resize of reserved memory") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7f7ba5f23f13..d027f2eb3559 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -445,7 +445,7 @@ static void __init setup_resources(void) * part of the System RAM resource. */ if (crashk_res.end) { - memblock_add(crashk_res.start, resource_size(&crashk_res)); + memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0); memblock_reserve(crashk_res.start, resource_size(&crashk_res)); insert_resource(&iomem_resource, &crashk_res); } -- GitLab From 959e95305f1316475dbbfa5b366d82d5ac65562b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Dec 2016 14:34:22 +0000 Subject: [PATCH 0154/1442] arm64: KVM: pmu: Reset PMSELR_EL0.SEL to a sane value before entering the guest commit 21cbe3cc8a48ff17059912e019fbde28ed54745a upstream. The ARMv8 architecture allows the cycle counter to be configured by setting PMSELR_EL0.SEL==0x1f and then accessing PMXEVTYPER_EL0, hence accessing PMCCFILTR_EL0. But it disallows the use of PMSELR_EL0.SEL==0x1f to access the cycle counter itself through PMXEVCNTR_EL0. Linux itself doesn't violate this rule, but we may end up with PMSELR_EL0.SEL being set to 0x1f when we enter a guest. If that guest accesses PMXEVCNTR_EL0, the access may UNDEF at EL1, despite the guest not having done anything wrong. In order to avoid this unfortunate course of events (haha!), let's sanitize PMSELR_EL0 on guest entry. This ensures that the guest won't explode unexpectedly. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/hyp/switch.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 83037cd62d01..0c848c18ca44 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -85,7 +85,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) write_sysreg(val, hcr_el2); /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); - /* Make sure we trap PMU access from EL0 to EL2 */ + /* + * Make sure we trap PMU access from EL0 to EL2. Also sanitize + * PMSELR_EL0 to make sure it never contains the cycle + * counter, which could make a PMXEVCNTR_EL0 access UNDEF at + * EL1 instead of being trapped to EL2. + */ + write_sysreg(0, pmselr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); __activate_traps_arch()(); -- GitLab From 7d5ec9eb3eea8e6efc6a5795e70bd4057adaf8a9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Dec 2016 12:59:31 -0800 Subject: [PATCH 0155/1442] latent_entropy: fix ARM build error on earlier gcc commit 9988f4d577f42f43b7612d755477585f35424af7 upstream. This fixes build errors seen on gcc-4.9.3 or gcc-5.3.1 for an ARM: arm-soc/init/initramfs.c: In function 'error': arm-soc/init/initramfs.c:50:1: error: unrecognizable insn: } ^ (insn 26 25 27 5 (set (reg:SI 111 [ local_entropy.243 ]) (rotatert:SI (reg:SI 116 [ local_entropy.243 ]) (const_int -30 [0xffffffffffffffe2]))) -1 (nil)) Patch from PaX Team Reported-by: Arnd Bergmann Reported-by: Brad Spengler Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- scripts/gcc-plugins/latent_entropy_plugin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 8160f1c1b56e..dff390f692a2 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -328,9 +328,9 @@ static enum tree_code get_op(tree *rhs) op = LROTATE_EXPR; /* * This code limits the value of random_const to - * the size of a wide int for the rotation + * the size of a long for the rotation */ - random_const &= HOST_BITS_PER_WIDE_INT - 1; + random_const %= TYPE_PRECISION(long_unsigned_type_node); break; } -- GitLab From a0357979d62049bd029e4b6efbaedbf4b0ea15ff Mon Sep 17 00:00:00 2001 From: Josh Cartwright Date: Thu, 13 Oct 2016 10:44:33 -0500 Subject: [PATCH 0156/1442] sc16is7xx: Drop bogus use of IRQF_ONESHOT commit 04da73803c05dc1150ccc31cbf93e8cd56679c09 upstream. The use of IRQF_ONESHOT when registering an interrupt handler with request_irq() is non-sensical. Not only that, it also prevents the handler from being threaded when it otherwise should be w/ IRQ_FORCED_THREADING is enabled. This causes the following deadlock observed by Sean Nyekjaer on -rt: Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM [..] rt_spin_lock_slowlock from queue_kthread_work queue_kthread_work from sc16is7xx_irq sc16is7xx_irq [sc16is7xx] from handle_irq_event_percpu handle_irq_event_percpu from handle_irq_event handle_irq_event from handle_level_irq handle_level_irq from generic_handle_irq generic_handle_irq from mxc_gpio_irq_handler mxc_gpio_irq_handler from mx3_gpio_irq_handler mx3_gpio_irq_handler from generic_handle_irq generic_handle_irq from __handle_domain_irq __handle_domain_irq from gic_handle_irq gic_handle_irq from __irq_svc __irq_svc from rt_spin_unlock rt_spin_unlock from kthread_worker_fn kthread_worker_fn from kthread kthread from ret_from_fork Fixes: 9e6f4ca3e567 ("sc16is7xx: use kthread_worker for tx_work and irq") Reported-by: Sean Nyekjaer Signed-off-by: Josh Cartwright Cc: linux-rt-users@vger.kernel.org Cc: Jakub Kicinski Cc: linux-serial@vger.kernel.org Cc: Sebastian Andrzej Siewior Signed-off-by: Julia Cartwright Acked-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index fb0672554123..793395451982 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1264,7 +1264,7 @@ static int sc16is7xx_probe(struct device *dev, /* Setup interrupt */ ret = devm_request_irq(dev, irq, sc16is7xx_irq, - IRQF_ONESHOT | flags, dev_name(dev), s); + flags, dev_name(dev), s); if (!ret) return 0; -- GitLab From 8beb252f2be2d1dcd6a6bef199dc62261f597b15 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 27 Nov 2016 19:32:32 +0300 Subject: [PATCH 0157/1442] md/raid5: limit request size according to implementation limits commit e8d7c33232e5fdfa761c3416539bc5b4acd12db5 upstream. Current implementation employ 16bit counter of active stripes in lower bits of bio->bi_phys_segments. If request is big enough to overflow this counter bio will be completed and freed too early. Fortunately this not happens in default configuration because several other limits prevent that: stripe_cache_size * nr_disks effectively limits count of active stripes. And small max_sectors_kb at lower disks prevent that during normal read/write operations. Overflow easily happens in discard if it's enabled by module parameter "devices_handle_discard_safely" and stripe_cache_size is set big enough. This patch limits requests size with 256Mb - 8Kb to prevent overflows. Signed-off-by: Konstantin Khlebnikov Cc: Shaohua Li Cc: Neil Brown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 92ac251e91e6..cce6057b9aca 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6984,6 +6984,15 @@ static int raid5_run(struct mddev *mddev) stripe = (stripe | (stripe-1)) + 1; mddev->queue->limits.discard_alignment = stripe; mddev->queue->limits.discard_granularity = stripe; + + /* + * We use 16-bit counter of active stripes in bi_phys_segments + * (minus one for over-loaded initialization) + */ + blk_queue_max_hw_sectors(mddev->queue, 0xfffe * STRIPE_SECTORS); + blk_queue_max_discard_sectors(mddev->queue, + 0xfffe * STRIPE_SECTORS); + /* * unaligned part of discard request will be ignored, so can't * guarantee discard_zeroes_data -- GitLab From 483eeca2cf389ea717cece86cce8494a050cb3f4 Mon Sep 17 00:00:00 2001 From: Kevin Barnett Date: Thu, 8 Dec 2016 10:29:29 -0600 Subject: [PATCH 0158/1442] scsi: aacraid: remove wildcard for series 9 controllers commit ae2aae2421983f6f68eb7c4692624bc43ea50712 upstream. Controllers with this PCI ID never shipped outside of PMCS/Microsemi. Remove the ID from the aacraid driver. smartpqi is the correct driver for these controllers. [mkp: patch description] Reviewed-by: Scott Teel Signed-off-by: Kevin Barnett Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/linit.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 79871f3519ff..d5b26fa541d3 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -160,7 +160,6 @@ static const struct pci_device_id aac_pci_tbl[] = { { 0x9005, 0x028b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 62 }, /* Adaptec PMC Series 6 (Tupelo) */ { 0x9005, 0x028c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 63 }, /* Adaptec PMC Series 7 (Denali) */ { 0x9005, 0x028d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 64 }, /* Adaptec PMC Series 8 */ - { 0x9005, 0x028f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 65 }, /* Adaptec PMC Series 9 */ { 0,} }; MODULE_DEVICE_TABLE(pci, aac_pci_tbl); @@ -239,7 +238,6 @@ static struct aac_driver_ident aac_drivers[] = { { aac_src_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_SRC }, /* Adaptec PMC Series 6 (Tupelo) */ { aac_srcv_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_SRC }, /* Adaptec PMC Series 7 (Denali) */ { aac_srcv_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_SRC }, /* Adaptec PMC Series 8 */ - { aac_srcv_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_SRC } /* Adaptec PMC Series 9 */ }; /** -- GitLab From 75b1053c2b8c01baedf85f17be8455dadc63a383 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 7 Nov 2016 15:09:58 +1100 Subject: [PATCH 0159/1442] KVM: PPC: Book3S HV: Save/restore XER in checkpointed register state commit 0d808df06a44200f52262b6eb72bcb6042f5a7c5 upstream. When switching from/to a guest that has a transaction in progress, we need to save/restore the checkpointed register state. Although XER is part of the CPU state that gets checkpointed, the code that does this saving and restoring doesn't save/restore XER. This fixes it by saving and restoring the XER. To allow userspace to read/write the checkpointed XER value, we also add a new ONE_REG specifier. The visible effect of this bug is that the guest may see its XER value being corrupted when it uses transactions. Fixes: e4e38121507a ("KVM: PPC: Book3S HV: Add transactional memory support") Fixes: 0a8eccefcb34 ("KVM: PPC: Book3S HV: Add missing code for transaction reclaim on guest exit") Signed-off-by: Paul Mackerras Reviewed-by: Thomas Huth Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_hv.c | 6 ++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 ++++ 6 files changed, 14 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6bbceb9a3a19..1f5eab4ef88f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2050,6 +2050,7 @@ registers, find a list below: PPC | KVM_REG_PPC_TM_VSCR | 32 PPC | KVM_REG_PPC_TM_DSCR | 64 PPC | KVM_REG_PPC_TM_TAR | 64 + PPC | KVM_REG_PPC_TM_XER | 64 | | MIPS | KVM_REG_MIPS_R0 | 64 ... diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 28350a294b1e..5e12e19940e2 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -546,6 +546,7 @@ struct kvm_vcpu_arch { u64 tfiar; u32 cr_tm; + u64 xer_tm; u64 lr_tm; u64 ctr_tm; u64 amr_tm; diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index c93cf35ce379..0fb1326c3ea2 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -596,6 +596,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) +#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a) /* PPC64 eXternal Interrupt Controller Specification */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index caec7bf3b99a..c833d88c423d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -569,6 +569,7 @@ int main(void) DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); + DEFINE(VCPU_XER_TM, offsetof(struct kvm_vcpu, arch.xer_tm)); DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3686471be32b..094deb60c6fe 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1288,6 +1288,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: *val = get_reg_val(id, vcpu->arch.cr_tm); break; + case KVM_REG_PPC_TM_XER: + *val = get_reg_val(id, vcpu->arch.xer_tm); + break; case KVM_REG_PPC_TM_LR: *val = get_reg_val(id, vcpu->arch.lr_tm); break; @@ -1498,6 +1501,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: vcpu->arch.cr_tm = set_reg_val(id, *val); break; + case KVM_REG_PPC_TM_XER: + vcpu->arch.xer_tm = set_reg_val(id, *val); + break; case KVM_REG_PPC_TM_LR: vcpu->arch.lr_tm = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c3c1d1bcfc67..6f81adb112f1 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2600,11 +2600,13 @@ kvmppc_save_tm: mfctr r7 mfspr r8, SPRN_AMR mfspr r10, SPRN_TAR + mfxer r11 std r5, VCPU_LR_TM(r9) stw r6, VCPU_CR_TM(r9) std r7, VCPU_CTR_TM(r9) std r8, VCPU_AMR_TM(r9) std r10, VCPU_TAR_TM(r9) + std r11, VCPU_XER_TM(r9) /* Restore r12 as trap number. */ lwz r12, VCPU_TRAP(r9) @@ -2697,11 +2699,13 @@ kvmppc_restore_tm: ld r7, VCPU_CTR_TM(r4) ld r8, VCPU_AMR_TM(r4) ld r9, VCPU_TAR_TM(r4) + ld r10, VCPU_XER_TM(r4) mtlr r5 mtcr r6 mtctr r7 mtspr SPRN_AMR, r8 mtspr SPRN_TAR, r9 + mtxer r10 /* * Load up PPR and DSCR values but don't put them in the actual SPRs -- GitLab From 94107133ae811e0175803639ae7ab2411e6cdb3c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 16 Nov 2016 16:43:28 +1100 Subject: [PATCH 0160/1442] KVM: PPC: Book3S HV: Don't lose hardware R/C bit updates in H_PROTECT commit f064a0de1579fabded8990bed93971e30deb9ecb upstream. The hashed page table MMU in POWER processors can update the R (reference) and C (change) bits in a HPTE at any time until the HPTE has been invalidated and the TLB invalidation sequence has completed. In kvmppc_h_protect, which implements the H_PROTECT hypercall, we read the HPTE, modify the second doubleword, invalidate the HPTE in memory, do the TLB invalidation sequence, and then write the modified value of the second doubleword back to memory. In doing so we could overwrite an R/C bit update done by hardware between when we read the HPTE and when the TLB invalidation completed. To fix this we re-read the second doubleword after the TLB invalidation and OR in the (possibly) new values of R and C. We can use an OR since hardware only ever sets R and C, never clears them. This race was found by code inspection. In principle this bug could cause occasional guest memory corruption under host memory pressure. Fixes: a8606e20e41a ("KVM: PPC: Handle some PAPR hcalls in the kernel", 2011-06-29) Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 99b4e9d5dd23..5420d060c6f6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -653,6 +653,8 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, HPTE_V_ABSENT); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); + /* Don't lose R/C bit updates done by hardware */ + r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); hpte[1] = cpu_to_be64(r); } } -- GitLab From 3f618a0b872fea38c7d1d1f79eda40f88c6466c2 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 12 Dec 2016 11:01:37 -0800 Subject: [PATCH 0161/1442] kvm: nVMX: Allow L1 to intercept software exceptions (#BP and #OF) commit ef85b67385436ddc1998f45f1d6a210f935b3388 upstream. When L2 exits to L0 due to "exception or NMI", software exceptions (#BP and #OF) for which L1 has requested an intercept should be handled by L1 rather than L0. Previously, only hardware exceptions were forwarded to L1. Signed-off-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5382b82462fc..64774f419c72 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1343,10 +1343,10 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; } -static inline bool is_exception(u32 intr_info) +static inline bool is_nmi(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) - == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); + == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK); } static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, @@ -5476,7 +5476,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (is_machine_check(intr_info)) return handle_machine_check(vcpu); - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) + if (is_nmi(intr_info)) return 1; /* already handled by vmx_vcpu_run() */ if (is_no_device(intr_info)) { @@ -8018,7 +8018,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) switch (exit_reason) { case EXIT_REASON_EXCEPTION_NMI: - if (!is_exception(intr_info)) + if (is_nmi(intr_info)) return false; else if (is_page_fault(intr_info)) return enable_ept; @@ -8611,8 +8611,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ - if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && - (exit_intr_info & INTR_INFO_VALID_MASK)) { + if (is_nmi(exit_intr_info)) { kvm_before_handle_nmi(&vmx->vcpu); asm("int $2"); kvm_after_handle_nmi(&vmx->vcpu); -- GitLab From d06485e0fcf58a88daadcbe119a7d433cdaad8e6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 Dec 2016 16:08:41 +0100 Subject: [PATCH 0162/1442] fsnotify: Fix possible use-after-free in inode iteration on umount commit 5716863e0f8251d3360d4cbfc0e44e08007075df upstream. fsnotify_unmount_inodes() plays complex tricks to pin next inode in the sb->s_inodes list when iterating over all inodes. Furthermore the code has a bug that if the current inode is the last on i_sb_list that does not have e.g. I_FREEING set, then we leave next_i pointing to inode which may get removed from the i_sb_list once we drop s_inode_list_lock thus resulting in use-after-free issues (usually manifesting as infinite looping in fsnotify_unmount_inodes()). Fix the problem by keeping current inode pinned somewhat longer. Then we can make the code much simpler and standard. Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/notify/inode_mark.c | 45 +++++++++--------------------------------- 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 741077deef3b..a3645249f7ec 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -150,12 +150,10 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, */ void fsnotify_unmount_inodes(struct super_block *sb) { - struct inode *inode, *next_i, *need_iput = NULL; + struct inode *inode, *iput_inode = NULL; spin_lock(&sb->s_inode_list_lock); - list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) { - struct inode *need_iput_tmp; - + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { /* * We cannot __iget() an inode in state I_FREEING, * I_WILL_FREE, or I_NEW which is fine because by that point @@ -178,49 +176,24 @@ void fsnotify_unmount_inodes(struct super_block *sb) continue; } - need_iput_tmp = need_iput; - need_iput = NULL; - - /* In case fsnotify_inode_delete() drops a reference. */ - if (inode != need_iput_tmp) - __iget(inode); - else - need_iput_tmp = NULL; + __iget(inode); spin_unlock(&inode->i_lock); - - /* In case the dropping of a reference would nuke next_i. */ - while (&next_i->i_sb_list != &sb->s_inodes) { - spin_lock(&next_i->i_lock); - if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && - atomic_read(&next_i->i_count)) { - __iget(next_i); - need_iput = next_i; - spin_unlock(&next_i->i_lock); - break; - } - spin_unlock(&next_i->i_lock); - next_i = list_next_entry(next_i, i_sb_list); - } - - /* - * We can safely drop s_inode_list_lock here because either - * we actually hold references on both inode and next_i or - * end of list. Also no new inodes will be added since the - * umount has begun. - */ spin_unlock(&sb->s_inode_list_lock); - if (need_iput_tmp) - iput(need_iput_tmp); + if (iput_inode) + iput(iput_inode); /* for each watch, send FS_UNMOUNT and then remove it */ fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); fsnotify_inode_delete(inode); - iput(inode); + iput_inode = inode; spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); + + if (iput_inode) + iput(iput_inode); } -- GitLab From e80ceb2da52e0aae8e0ae9632c3abbfdd579cf61 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 6 Dec 2016 06:07:15 +0200 Subject: [PATCH 0163/1442] vsock/virtio: fix src/dst cid format commit f83f12d660d11718d3eed9d979ee03e83aa55544 upstream. These fields are 64 bit, using le32_to_cpu and friends on these will not do the right thing. Fix this up. Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/virtio_transport_common.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a53b3a16b4f1..62c056ea403b 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -606,9 +606,9 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) return 0; pkt = virtio_transport_alloc_pkt(&info, 0, - le32_to_cpu(pkt->hdr.dst_cid), + le64_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port), - le32_to_cpu(pkt->hdr.src_cid), + le64_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); if (!pkt) return -ENOMEM; @@ -823,7 +823,7 @@ virtio_transport_send_response(struct vsock_sock *vsk, struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_RESPONSE, .type = VIRTIO_VSOCK_TYPE_STREAM, - .remote_cid = le32_to_cpu(pkt->hdr.src_cid), + .remote_cid = le64_to_cpu(pkt->hdr.src_cid), .remote_port = le32_to_cpu(pkt->hdr.src_port), .reply = true, }; @@ -863,9 +863,9 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) child->sk_state = SS_CONNECTED; vchild = vsock_sk(child); - vsock_addr_init(&vchild->local_addr, le32_to_cpu(pkt->hdr.dst_cid), + vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); - vsock_addr_init(&vchild->remote_addr, le32_to_cpu(pkt->hdr.src_cid), + vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); vsock_insert_connected(vchild); @@ -904,9 +904,9 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) struct sock *sk; bool space_available; - vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), + vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); - vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), + vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, -- GitLab From 21be088c36c604af29810b3fe5a7a8c7bb42b0e9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 8 Dec 2016 12:48:26 -0500 Subject: [PATCH 0164/1442] ftrace/x86_32: Set ftrace_stub to weak to prevent gcc from using short jumps to it commit 847fa1a6d3d00f3bdf68ef5fa4a786f644a0dd67 upstream. With new binutils, gcc may get smart with its optimization and change a jmp from a 5 byte jump to a 2 byte one even though it was jumping to a global function. But that global function existed within a 2 byte radius, and gcc was able to optimize it. Unfortunately, that jump was also being modified when function graph tracing begins. Since ftrace expected that jump to be 5 bytes, but it was only two, it overwrote code after the jump, causing a crash. This was fixed for x86_64 with commit 8329e818f149, with the same subject as this commit, but nothing was done for x86_32. Fixes: d61f82d06672 ("ftrace: use dynamic patching for updating mcount calls") Reported-by: Colin Ian King Tested-by: Colin Ian King Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 21b352a11b49..edba8606b99a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -889,8 +889,8 @@ ftrace_graph_call: jmp ftrace_stub #endif -.globl ftrace_stub -ftrace_stub: +/* This is weak to keep gas from relaxing the jumps */ +WEAK(ftrace_stub) ret END(ftrace_caller) -- GitLab From 3168762e8ad3600392b0b6e230e550271c68fe36 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Tue, 29 Nov 2016 23:23:06 -0200 Subject: [PATCH 0165/1442] platform/x86: asus-nb-wmi.c: Add X45U quirk commit e74e259939275a5dd4e0d02845c694f421e249ad upstream. Without this patch, the Asus X45U wireless card can't be turned on (hard-blocked), but after a suspend/resume it just starts working. Following this bug report[1], there are other cases like this one, but this Asus is the only model that I can test. [1] https://ubuntuforums.org/showthread.php?t=2181558 Signed-off-by: Marcos Paulo de Souza Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/asus-nb-wmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 26e4cbc34db8..6032b7085582 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -173,6 +173,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_wapf4, }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X45U", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X45U"), + }, + .driver_data = &quirk_asus_wapf4, + }, { .callback = dmi_matched, .ident = "ASUSTeK COMPUTER INC. X456UA", -- GitLab From a035dc674dd477e61e5b917c60c30622b6d083f8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 Dec 2016 11:04:53 +0100 Subject: [PATCH 0166/1442] x86/smpboot: Make logical package management more robust commit 9d85eb9119f4eeeb48e87adfcd71f752655700e9 upstream. The logical package management has several issues: - The APIC ids provided by ACPI are not required to be the same as the initial APIC id which can be retrieved by CPUID. The APIC ids provided by ACPI are those which are written by the BIOS into the APIC. The initial id is set by hardware and can not be changed. The hardware provided ids contain the real hardware package information. Especially AMD sets the effective APIC id different from the hardware id as they need to reserve space for the IOAPIC ids starting at id 0. As a consequence those machines trigger the currently active firmware bug printouts in dmesg, These are obviously wrong. - Virtual machines have their own interesting of enumerating APICs and packages which are not reliably covered by the current implementation. The sizing of the mapping array has been tweaked to be generously large to handle systems which provide a wrong core count when HT is disabled so the whole magic which checks for space in the physical hotplug case is not needed anymore. Simplify the whole machinery and do the mapping when the CPU starts and the CPUID derived physical package information is available. This solves the observed problems on AMD machines and works for the virtualization issues as well. Remove the extra call from XEN cpu bringup code as it is not longer required. Fixes: d49597fd3bc7 ("x86/cpu: Deal with broken firmware (VMWare/XEN)") Reported-and-tested-by: Borislav Petkov Tested-by: Boris Ostrovsky Signed-off-by: Thomas Gleixner Cc: Juergen Gross Cc: Peter Zijlstra Cc: M. Vefa Bicakci Cc: xen-devel Cc: Charles (Chas) Williams Cc: Borislav Petkov Cc: Alok Kataria Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1612121102260.3429@nanos Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/apic.c | 15 ----------- arch/x86/kernel/cpu/common.c | 24 ++++++----------- arch/x86/kernel/smpboot.c | 51 ++++++++++++++---------------------- arch/x86/xen/smp.c | 6 ----- 4 files changed, 27 insertions(+), 69 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 88c657b057e2..f2234918e494 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2158,21 +2158,6 @@ int __generic_processor_info(int apicid, int version, bool enabled) } } - /* - * This can happen on physical hotplug. The sanity check at boot time - * is done from native_smp_prepare_cpus() after num_possible_cpus() is - * established. - */ - if (topology_update_package_map(apicid, cpu) < 0) { - int thiscpu = max + disabled_cpus; - - pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n", - thiscpu, apicid); - - disabled_cpus++; - return -ENOSPC; - } - /* * Validate version */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc9e980c68ec..c2048b44851c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -979,29 +979,21 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c) } /* - * The physical to logical package id mapping is initialized from the - * acpi/mptables information. Make sure that CPUID actually agrees with - * that. + * Validate that ACPI/mptables have the same information about the + * effective APIC id and update the package map. */ -static void sanitize_package_id(struct cpuinfo_x86 *c) +static void validate_apic_and_package_id(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - unsigned int pkg, apicid, cpu = smp_processor_id(); + unsigned int apicid, cpu = smp_processor_id(); apicid = apic->cpu_present_to_apicid(cpu); - pkg = apicid >> boot_cpu_data.x86_coreid_bits; - if (apicid != c->initial_apicid) { - pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n", + if (apicid != c->apicid) { + pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n", cpu, apicid, c->initial_apicid); - c->initial_apicid = apicid; } - if (pkg != c->phys_proc_id) { - pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n", - cpu, pkg, c->phys_proc_id); - c->phys_proc_id = pkg; - } - c->logical_proc_id = topology_phys_to_logical_pkg(pkg); + BUG_ON(topology_update_package_map(c->phys_proc_id, cpu)); #else c->logical_proc_id = 0; #endif @@ -1132,7 +1124,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif - sanitize_package_id(c); } /* @@ -1188,6 +1179,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) enable_sep_cpu(); #endif mtrr_ap_init(); + validate_apic_and_package_id(c); } struct msr_range { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 42f5eb7b4f6c..e9bbe02950ad 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -104,7 +104,6 @@ static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; -static bool logical_packages_frozen __read_mostly; /* Maximum number of SMT threads on any online core */ int __max_smt_threads __read_mostly; @@ -263,9 +262,14 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -int topology_update_package_map(unsigned int apicid, unsigned int cpu) +/** + * topology_update_package_map - Update the physical to logical package map + * @pkg: The physical package id as retrieved via CPUID + * @cpu: The cpu for which this is updated + */ +int topology_update_package_map(unsigned int pkg, unsigned int cpu) { - unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits; + unsigned int new; /* Called from early boot ? */ if (!physical_package_map) @@ -278,16 +282,17 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu) if (test_and_set_bit(pkg, physical_package_map)) goto found; - if (logical_packages_frozen) { - physical_to_logical_pkg[pkg] = -1; - pr_warn("APIC(%x) Package %u exceeds logical package max\n", - apicid, pkg); + if (logical_packages >= __max_logical_packages) { + pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n", + logical_packages, cpu, __max_logical_packages); return -ENOSPC; } new = logical_packages++; - pr_info("APIC(%x) Converting physical %u to logical package %u\n", - apicid, pkg, new); + if (new != pkg) { + pr_info("CPU %u Converting physical %u to logical package %u\n", + cpu, pkg, new); + } physical_to_logical_pkg[pkg] = new; found: @@ -308,9 +313,9 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg) } EXPORT_SYMBOL(topology_phys_to_logical_pkg); -static void __init smp_init_package_map(void) +static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu) { - unsigned int ncpus, cpu; + unsigned int ncpus; size_t size; /* @@ -355,27 +360,9 @@ static void __init smp_init_package_map(void) size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); physical_package_map = kzalloc(size, GFP_KERNEL); - for_each_present_cpu(cpu) { - unsigned int apicid = apic->cpu_present_to_apicid(cpu); - - if (apicid == BAD_APICID || !apic->apic_id_valid(apicid)) - continue; - if (!topology_update_package_map(apicid, cpu)) - continue; - pr_warn("CPU %u APICId %x disabled\n", cpu, apicid); - per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID; - set_cpu_possible(cpu, false); - set_cpu_present(cpu, false); - } - - if (logical_packages > __max_logical_packages) { - pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n", - logical_packages, __max_logical_packages); - logical_packages_frozen = true; - __max_logical_packages = logical_packages; - } - pr_info("Max logical packages: %u\n", __max_logical_packages); + + topology_update_package_map(c->phys_proc_id, cpu); } void __init smp_store_boot_cpu_info(void) @@ -385,7 +372,7 @@ void __init smp_store_boot_cpu_info(void) *c = boot_cpu_data; c->cpu_index = id; - smp_init_package_map(); + smp_init_package_map(c, id); } /* diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 9fa27ceeecfd..311acad7dad2 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -87,12 +87,6 @@ static void cpu_bringup(void) cpu_data(cpu).x86_max_cores = 1; set_cpu_sibling_map(cpu); - /* - * identify_cpu() may have set logical_pkg_id to -1 due - * to incorrect phys_proc_id. Let's re-comupte it. - */ - topology_update_package_map(apic->cpu_present_to_apicid(cpu), cpu); - xen_setup_cpu_clockevents(); notify_cpu_starting(cpu); -- GitLab From 239b40eb6189e49610e1e614c3670f92d74b9c7b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 8 Dec 2016 20:54:49 -0500 Subject: [PATCH 0167/1442] fgraph: Handle a case where a tracer ignores set_graph_notrace commit 794de08a16cf1fc1bf785dc48f66d36218cf6d88 upstream. Both the wakeup and irqsoff tracers can use the function graph tracer when the display-graph option is set. The problem is that they ignore the notrace file, and record the entry of functions that would be ignored by the function_graph tracer. This causes the trace->depth to be recorded into the ring buffer. The set_graph_notrace uses a trick by adding a large negative number to the trace->depth when a graph function is to be ignored. On trace output, the graph function uses the depth to record a stack of functions. But since the depth is negative, it accesses the array with a negative number and causes an out of bounds access that can cause a kernel oops or corrupt data. Have the print functions handle cases where a tracer still records functions even when they are in set_graph_notrace. Also add warnings if the depth is below zero before accessing the array. Note, the function graph logic will still prevent the return of these functions from being recorded, which means that they will be left hanging without a return. For example: # echo '*spin*' > set_graph_notrace # echo 1 > options/display-graph # echo wakeup > current_tracer # cat trace [...] _raw_spin_lock() { preempt_count_add() { do_raw_spin_lock() { update_rq_clock(); Where it should look like: _raw_spin_lock() { preempt_count_add(); do_raw_spin_lock(); } update_rq_clock(); Cc: Namhyung Kim Fixes: 29ad23b00474 ("ftrace: Add set_graph_notrace filter") Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_functions_graph.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4e480e870474..a17cb1d8415c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -842,6 +842,10 @@ print_graph_entry_leaf(struct trace_iterator *iter, cpu_data = per_cpu_ptr(data->cpu_data, cpu); + /* If a graph tracer ignored set_graph_notrace */ + if (call->depth < -1) + call->depth += FTRACE_NOTRACE_DEPTH; + /* * Comments display at + 1 to depth. Since * this is a leaf function, keep the comments @@ -850,7 +854,8 @@ print_graph_entry_leaf(struct trace_iterator *iter, cpu_data->depth = call->depth - 1; /* No need to keep this function around for this depth */ - if (call->depth < FTRACE_RETFUNC_DEPTH) + if (call->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(call->depth < 0)) cpu_data->enter_funcs[call->depth] = 0; } @@ -880,11 +885,16 @@ print_graph_entry_nested(struct trace_iterator *iter, struct fgraph_cpu_data *cpu_data; int cpu = iter->cpu; + /* If a graph tracer ignored set_graph_notrace */ + if (call->depth < -1) + call->depth += FTRACE_NOTRACE_DEPTH; + cpu_data = per_cpu_ptr(data->cpu_data, cpu); cpu_data->depth = call->depth; /* Save this function pointer to see if the exit matches */ - if (call->depth < FTRACE_RETFUNC_DEPTH) + if (call->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(call->depth < 0)) cpu_data->enter_funcs[call->depth] = call->func; } @@ -1114,7 +1124,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, */ cpu_data->depth = trace->depth - 1; - if (trace->depth < FTRACE_RETFUNC_DEPTH) { + if (trace->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(trace->depth < 0)) { if (cpu_data->enter_funcs[trace->depth] != trace->func) func_match = 0; cpu_data->enter_funcs[trace->depth] = 0; -- GitLab From 382072e09f19ba0fa6a3c8e3373d36bb7247cd44 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 21 Nov 2016 10:21:17 -0800 Subject: [PATCH 0168/1442] IB/mad: Fix an array index check commit 2fe2f378dd45847d2643638c07a7658822087836 upstream. The array ib_mad_mgmt_class_table.method_table has MAX_MGMT_CLASS (80) elements. Hence compare the array index with that value instead of with IB_MGMT_MAX_METHODS (128). This patch avoids that Coverity reports the following: Overrunning array class->method_table of 80 8-byte elements at element index 127 (byte offset 1016) using index convert_mgmt_class(mad_hdr->mgmt_class) (which evaluates to 127). Fixes: commit b7ab0b19a85f ("IB/mad: Verify mgmt class in received MADs") Signed-off-by: Bart Van Assche Cc: Sean Hefty Reviewed-by: Hal Rosenstock Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 40cbd6bdb73b..2395fe2021c9 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1746,7 +1746,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, if (!class) goto out; if (convert_mgmt_class(mad_hdr->mgmt_class) >= - IB_MGMT_MAX_METHODS) + ARRAY_SIZE(class->method_table)) goto out; method = class->method_table[convert_mgmt_class( mad_hdr->mgmt_class)]; -- GitLab From 4187dfa67fa9bfdfa3ddc14796b42302faf1f357 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 21 Nov 2016 10:21:41 -0800 Subject: [PATCH 0169/1442] IPoIB: Avoid reading an uninitialized member variable commit 11b642b84e8c43e8597de031678d15c08dd057bc upstream. This patch avoids that Coverity reports the following: Using uninitialized value port_attr.state when calling printk Fixes: commit 94232d9ce817 ("IPoIB: Start multicast join process only on active ports") Signed-off-by: Bart Van Assche Cc: Erez Shitrit Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 1909dd252c94..fddff403d5d2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -575,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work) if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) return; - if (ib_query_port(priv->ca, priv->port, &port_attr) || - port_attr.state != IB_PORT_ACTIVE) { + if (ib_query_port(priv->ca, priv->port, &port_attr)) { + ipoib_dbg(priv, "ib_query_port() failed\n"); + return; + } + if (port_attr.state != IB_PORT_ACTIVE) { ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n", port_attr.state); return; -- GitLab From 5984423bf7ebea12f953e4665aa72ccff83623d1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 21 Nov 2016 10:22:17 -0800 Subject: [PATCH 0170/1442] IB/multicast: Check ib_find_pkey() return value commit d3a2418ee36a59bc02e9d454723f3175dcf4bfd9 upstream. This patch avoids that Coverity complains about not checking the ib_find_pkey() return value. Fixes: commit 547af76521b3 ("IB/multicast: Report errors on multicast groups if P_key changes") Signed-off-by: Bart Van Assche Cc: Sean Hefty Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/multicast.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index e51b739f6ea3..322cb67b07a9 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -518,8 +518,11 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec, process_join_error(group, status); else { int mgids_changed, is_mgid0; - ib_find_pkey(group->port->dev->device, group->port->port_num, - be16_to_cpu(rec->pkey), &pkey_index); + + if (ib_find_pkey(group->port->dev->device, + group->port->port_num, be16_to_cpu(rec->pkey), + &pkey_index)) + pkey_index = MCAST_INVALID_PKEY_INDEX; spin_lock_irq(&group->port->lock); if (group->state == MCAST_BUSY && -- GitLab From 476ed812c42d20ebd26468ce55e34b02ebe1239d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 15 Dec 2016 17:15:07 +0100 Subject: [PATCH 0171/1442] IB/rxe: Fix a memory leak in rxe_qp_cleanup() commit e259934d4df7f99f2a5c2c4f074f6a55bd4b1722 upstream. A socket is associated with every QP by the rxe driver but sock_release() is never called. Add a call to sock_release() in rxe_qp_cleanup(). Fixes: commit 8700e3e7c48A5 ("Add Soft RoCE driver") Signed-off-by: Bart Van Assche Cc: Moni Shoua Cc: Kamal Heib Cc: Amir Vadai Cc: Haggai Eran Reviewed-by: Moni Shoua Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_qp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index c3e60e4bde6e..486d576e55bc 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -855,4 +855,5 @@ void rxe_qp_cleanup(void *arg) free_rd_atomic_resources(qp); kernel_sock_shutdown(qp->sk, SHUT_RDWR); + sock_release(qp->sk); } -- GitLab From 9aff8b170fe01cb01cdcce2590dd135925570f1b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 19 Dec 2016 18:00:05 +0100 Subject: [PATCH 0172/1442] IB/cma: Fix a race condition in iboe_addr_get_sgid() commit fba332b079029c2f4f7e84c1c1cd8e3867310c90 upstream. Code that dereferences the struct net_device ip_ptr member must be protected with an in_dev_get() / in_dev_put() pair. Hence insert calls to these functions. Fixes: commit 7b85627b9f02 ("IB/cma: IBoE (RoCE) IP-based GID addressing") Signed-off-by: Bart Van Assche Reviewed-by: Moni Shoua Cc: Or Gerlitz Cc: Roland Dreier Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- include/rdma/ib_addr.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 931a47ba4571..1beab5532035 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -205,10 +205,12 @@ static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (dev) { - ip4 = (struct in_device *)dev->ip_ptr; - if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) + ip4 = in_dev_get(dev); + if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) { ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address, (struct in6_addr *)gid); + in_dev_put(ip4); + } dev_put(dev); } } -- GitLab From eb9afff9513dff7862eba01679fb6587e97fc2b7 Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Wed, 30 Nov 2016 22:08:27 -0200 Subject: [PATCH 0173/1442] mn88472: fix chip id check on probe commit 365fe4e0ce218dc5ad10df17b150a366b6015499 upstream. A register used to identify chip during probe was overwritten during firmware download and due to that later probe's for warm chip were failing. Detect chip from the another register, which is located on different register bank 2. Fixes: 94d0eaa41987 ("[media] mn88472: move out of staging to media") Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/mn88472.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/media/dvb-frontends/mn88472.c b/drivers/media/dvb-frontends/mn88472.c index 18fb2df1e2bd..72650116732c 100644 --- a/drivers/media/dvb-frontends/mn88472.c +++ b/drivers/media/dvb-frontends/mn88472.c @@ -488,18 +488,6 @@ static int mn88472_probe(struct i2c_client *client, goto err_kfree; } - /* Check demod answers with correct chip id */ - ret = regmap_read(dev->regmap[0], 0xff, &utmp); - if (ret) - goto err_regmap_0_regmap_exit; - - dev_dbg(&client->dev, "chip id=%02x\n", utmp); - - if (utmp != 0x02) { - ret = -ENODEV; - goto err_regmap_0_regmap_exit; - } - /* * Chip has three I2C addresses for different register banks. Used * addresses are 0x18, 0x1a and 0x1c. We register two dummy clients, @@ -536,6 +524,18 @@ static int mn88472_probe(struct i2c_client *client, } i2c_set_clientdata(dev->client[2], dev); + /* Check demod answers with correct chip id */ + ret = regmap_read(dev->regmap[2], 0xff, &utmp); + if (ret) + goto err_regmap_2_regmap_exit; + + dev_dbg(&client->dev, "chip id=%02x\n", utmp); + + if (utmp != 0x02) { + ret = -ENODEV; + goto err_regmap_2_regmap_exit; + } + /* Sleep because chip is active by default */ ret = regmap_write(dev->regmap[2], 0x05, 0x3e); if (ret) -- GitLab From 9fd64b8302593a6398e4e05571e84309b017fd55 Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Wed, 30 Nov 2016 19:36:14 -0200 Subject: [PATCH 0174/1442] mn88473: fix chip id check on probe commit d930b5b5bf122a61952cfebabb1e618682a2631a upstream. A register used to identify chip during probe was overwritten during firmware download and due to that later probe's for warm chip were failing. Detect chip from the another register, which is located on different register bank 2. Fixes: 7908fad99a6c ("[media] mn88473: finalize driver") Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/mn88473.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/media/dvb-frontends/mn88473.c b/drivers/media/dvb-frontends/mn88473.c index 451974a1d7ed..2932bdc8fa94 100644 --- a/drivers/media/dvb-frontends/mn88473.c +++ b/drivers/media/dvb-frontends/mn88473.c @@ -485,18 +485,6 @@ static int mn88473_probe(struct i2c_client *client, goto err_kfree; } - /* Check demod answers with correct chip id */ - ret = regmap_read(dev->regmap[0], 0xff, &uitmp); - if (ret) - goto err_regmap_0_regmap_exit; - - dev_dbg(&client->dev, "chip id=%02x\n", uitmp); - - if (uitmp != 0x03) { - ret = -ENODEV; - goto err_regmap_0_regmap_exit; - } - /* * Chip has three I2C addresses for different register banks. Used * addresses are 0x18, 0x1a and 0x1c. We register two dummy clients, @@ -533,6 +521,18 @@ static int mn88473_probe(struct i2c_client *client, } i2c_set_clientdata(dev->client[2], dev); + /* Check demod answers with correct chip id */ + ret = regmap_read(dev->regmap[2], 0xff, &uitmp); + if (ret) + goto err_regmap_2_regmap_exit; + + dev_dbg(&client->dev, "chip id=%02x\n", uitmp); + + if (uitmp != 0x03) { + ret = -ENODEV; + goto err_regmap_2_regmap_exit; + } + /* Sleep because chip is active by default */ ret = regmap_write(dev->regmap[2], 0x05, 0x3e); if (ret) -- GitLab From 3fbe140e9d360a31ec88ee1126df778b33dde9f2 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 16 Sep 2016 03:14:33 -0300 Subject: [PATCH 0175/1442] s5p-mfc: fix failure path of s5p_mfc_alloc_memdev() commit 3467c9a7e7f9209a9ecd8f9db65b04a323a13932 upstream. s5p_mfc_alloc_memdev() function lacks proper releasing of allocated device in case of reserved memory initialization failure. This results in NULL pointer dereference: [ 2.828457] Unable to handle kernel NULL pointer dereference at virtual address 00000001 [ 2.835089] pgd = c0004000 [ 2.837752] [00000001] *pgd=00000000 [ 2.844696] Internal error: Oops: 5 [#1] PREEMPT SMP ARM [ 2.848680] Modules linked in: [ 2.851722] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.8.0-rc6-00002-gafa1b97 #878 [ 2.859357] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 2.865433] task: ef080000 task.stack: ef06c000 [ 2.869952] PC is at strcmp+0x0/0x30 [ 2.873508] LR is at platform_match+0x84/0xac [ 2.877847] pc : [] lr : [] psr: 20000013 [ 2.877847] sp : ef06dea0 ip : 00000000 fp : 00000000 [ 2.889303] r10: 00000000 r9 : c0b34848 r8 : c0b1e968 [ 2.894511] r7 : 00000000 r6 : 00000001 r5 : c086e7fc r4 : eeb8e010 [ 2.901021] r3 : 0000006d r2 : 00000000 r1 : c086e7fc r0 : 00000001 [ 2.907533] Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 2.914649] Control: 10c5387d Table: 4000404a DAC: 00000051 [ 2.920378] Process swapper/0 (pid: 1, stack limit = 0xef06c210) [ 2.926367] Stack: (0xef06dea0 to 0xef06e000) [ 2.930711] dea0: eeb8e010 c0c2d91c c03f4a6c c03f4a8c 00000000 c0c2d91c c03f4a6c c03f2fc8 [ 2.938870] dec0: ef003274 ef10c4c0 c0c2d91c ef10cc80 c0c21270 c03f3fa4 c09c1be8 c0c2d91c [ 2.947028] dee0: 00000006 c0c2d91c 00000006 c0b3483c c0c47000 c03f5314 c0c2d908 c0b5fed8 [ 2.955188] df00: 00000006 c010178c 60000013 c0a4ef14 00000000 c06feaa0 ef080000 60000013 [ 2.963347] df20: 00000000 c0c095c8 efffca76 c0816b8c 000000d5 c0134098 c0b34848 c09d6cdc [ 2.971506] df40: c0a4de70 00000000 00000006 00000006 c0c09568 efffca40 c0b5fed8 00000006 [ 2.979665] df60: c0b3483c c0c47000 000000d5 c0b34848 c0b005a4 c0b00d84 00000006 00000006 [ 2.987824] df80: 00000000 c0b005a4 00000000 c06fb4d8 00000000 00000000 00000000 00000000 [ 2.995983] dfa0: 00000000 c06fb4e0 00000000 c01079b8 00000000 00000000 00000000 00000000 [ 3.004142] dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 3.012302] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000 ffffffff ffffffff [ 3.020469] [] (strcmp) from [] (platform_match+0x84/0xac) [ 3.027672] [] (platform_match) from [] (__driver_attach+0x20/0xb0) [ 3.035654] [] (__driver_attach) from [] (bus_for_each_dev+0x54/0x88) [ 3.043812] [] (bus_for_each_dev) from [] (bus_add_driver+0xe8/0x1f4) [ 3.051971] [] (bus_add_driver) from [] (driver_register+0x78/0xf4) [ 3.059958] [] (driver_register) from [] (do_one_initcall+0x3c/0x16c) [ 3.068123] [] (do_one_initcall) from [] (kernel_init_freeable+0x120/0x1ec) [ 3.076802] [] (kernel_init_freeable) from [] (kernel_init+0x8/0x118) [ 3.084958] [] (kernel_init) from [] (ret_from_fork+0x14/0x3c) [ 3.092506] Code: 1afffffb e12fff1e e1a03000 eafffff7 (e4d03001) [ 3.098618] ---[ end trace 511bf9d750810709 ]--- [ 3.103207] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b This patch fixes this issue. Fixes: c79667dd93b084fe412bcfe7fbf0ba43f7dec520 ("media: s5p-mfc: replace custom reserved memory handling code with generic one") Signed-off-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/s5p-mfc/s5p_mfc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index 0a5b8f5e011e..3436eda58855 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -1082,6 +1082,7 @@ static struct device *s5p_mfc_alloc_memdev(struct device *dev, idx); if (ret == 0) return child; + device_del(child); } put_device(child); -- GitLab From 7f89c1373637395de72f53cb20a170fe24d1f371 Mon Sep 17 00:00:00 2001 From: Andrey Utkin Date: Sat, 22 Oct 2016 13:34:36 -0200 Subject: [PATCH 0176/1442] media: solo6x10: fix lockup by avoiding delayed register write commit 5fc4b067ec082c3127e0156f800769b7e0dce078 upstream. This fixes a lockup at device probing which happens on some solo6010 hardware samples. This is a regression introduced by commit e1ceb25a1569 ("[media] SOLO6x10: remove unneeded register locking and barriers") The observed lockup happens in solo_set_motion_threshold() called from solo_motion_config(). This extra "flushing" is not fundamentally needed for every write, but apparently the code in driver assumes such behaviour at last in some places. Actual fix was proposed by Hans Verkuil. Fixes: e1ceb25a1569 ("[media] SOLO6x10: remove unneeded register locking and barriers") Signed-off-by: Andrey Utkin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/solo6x10/solo6x10.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/pci/solo6x10/solo6x10.h b/drivers/media/pci/solo6x10/solo6x10.h index 5bd498735a66..3f8da5e8c430 100644 --- a/drivers/media/pci/solo6x10/solo6x10.h +++ b/drivers/media/pci/solo6x10/solo6x10.h @@ -284,7 +284,10 @@ static inline u32 solo_reg_read(struct solo_dev *solo_dev, int reg) static inline void solo_reg_write(struct solo_dev *solo_dev, int reg, u32 data) { + u16 val; + writel(data, solo_dev->reg_base + reg); + pci_read_config_word(solo_dev->pdev, PCI_STATUS, &val); } static inline void solo_irq_on(struct solo_dev *dev, u32 mask) -- GitLab From e5d92c0c84f3b821c65210ab20bb4583879a03c9 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 8 Dec 2016 20:22:43 -0200 Subject: [PATCH 0177/1442] v4l: tvp5150: Add missing break in set control handler commit d183e4efcae8d88a2f252e546978658ca6d273cc upstream. A break is missing resulting in the hue control enabling or disabling the decode completely. Fix it. Fixes: c43875f66140 ("[media] tvp5150: replace MEDIA_ENT_F_CONN_TEST by a control") Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/tvp5150.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 4740da39d698..7268e706e216 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -815,6 +815,7 @@ static int tvp5150_s_ctrl(struct v4l2_ctrl *ctrl) return 0; case V4L2_CID_HUE: tvp5150_write(sd, TVP5150_HUE_CTL, ctrl->val); + break; case V4L2_CID_TEST_PATTERN: decoder->enable = ctrl->val ? false : true; tvp5150_selmux(sd); -- GitLab From afd2a1994ea4e37fb602410b350827d5909714fe Mon Sep 17 00:00:00 2001 From: Jingkui Wang Date: Mon, 12 Dec 2016 13:51:46 -0800 Subject: [PATCH 0178/1442] Input: drv260x - fix input device's parent assignment commit 5a8a6b89c15766446d845671d574a9243b6d8786 upstream. We were assigning I2C bus controller instead of client as parent device. Besides being logically wrong, it messed up with devm handling of input device. As a result we were leaving input device and event node behind after rmmod-ing the driver, which lead to a kernel oops if one were to access the event node later. Let's remove the assignment and rely on devm_input_allocate_device() to set it up properly for us. Signed-off-by: Jingkui Wang Fixes: 7132fe4f5687 ("Input: drv260x - add TI drv260x haptics driver") Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/drv260x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c index 2adfd86c869a..930424e55439 100644 --- a/drivers/input/misc/drv260x.c +++ b/drivers/input/misc/drv260x.c @@ -592,7 +592,6 @@ static int drv260x_probe(struct i2c_client *client, } haptics->input_dev->name = "drv260x:haptics"; - haptics->input_dev->dev.parent = client->dev.parent; haptics->input_dev->close = drv260x_close; input_set_drvdata(haptics->input_dev, haptics); input_set_capability(haptics->input_dev, EV_FF, FF_RUMBLE); -- GitLab From 6195cd1a9628281a10a7132ae0f664a496b2b33c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 9 Dec 2016 11:57:43 +0100 Subject: [PATCH 0179/1442] bad_inode: add missing i_op initializers commit 3f9ca75516a7e581ff803f751a869c1da5ae5fa5 upstream. New inode operations were forgotten to be added to bad_inode. Most of the time the op is checked for NULL before being called but marking the inode bad and the check can race (very unlikely). However in case of ->get_link() only DCACHE_SYMLINK_TYPE is checked before calling the op, so there's no race and will definitely oops when trying to follow links on such a beast. Also remove comments about extinct ops. Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/bad_inode.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 8712062275b8..5f685c819298 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -106,6 +106,50 @@ static ssize_t bad_inode_listxattr(struct dentry *dentry, char *buffer, return -EIO; } +static const char *bad_inode_get_link(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) +{ + return ERR_PTR(-EIO); +} + +static struct posix_acl *bad_inode_get_acl(struct inode *inode, int type) +{ + return ERR_PTR(-EIO); +} + +static int bad_inode_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, u64 start, + u64 len) +{ + return -EIO; +} + +static int bad_inode_update_time(struct inode *inode, struct timespec *time, + int flags) +{ + return -EIO; +} + +static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry, + struct file *file, unsigned int open_flag, + umode_t create_mode, int *opened) +{ + return -EIO; +} + +static int bad_inode_tmpfile(struct inode *inode, struct dentry *dentry, + umode_t mode) +{ + return -EIO; +} + +static int bad_inode_set_acl(struct inode *inode, struct posix_acl *acl, + int type) +{ + return -EIO; +} + static const struct inode_operations bad_inode_ops = { .create = bad_inode_create, @@ -118,14 +162,17 @@ static const struct inode_operations bad_inode_ops = .mknod = bad_inode_mknod, .rename = bad_inode_rename2, .readlink = bad_inode_readlink, - /* follow_link must be no-op, otherwise unmounting this inode - won't work */ - /* put_link returns void */ - /* truncate returns void */ .permission = bad_inode_permission, .getattr = bad_inode_getattr, .setattr = bad_inode_setattr, .listxattr = bad_inode_listxattr, + .get_link = bad_inode_get_link, + .get_acl = bad_inode_get_acl, + .fiemap = bad_inode_fiemap, + .update_time = bad_inode_update_time, + .atomic_open = bad_inode_atomic_open, + .tmpfile = bad_inode_tmpfile, + .set_acl = bad_inode_set_acl, }; -- GitLab From 7cc603e9a828e205c393e49711980c9e9e3938ba Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 11 Nov 2016 10:55:41 -0600 Subject: [PATCH 0180/1442] i40iw: Use correct src address in memcpy to rdma stats counters commit 91c42b72f8e8b45961ff05a05009b644e6316ca2 upstream. hw_stats is a pointer to i40_iw_dev_stats struct in i40iw_get_hw_stats(). Use hw_stats and not &hw_stats in the memcpy to copy the i40iw device stats data into rdma_hw_stats counters. Fixes: b40f4757daa1 ("IB/core: Make device counter infrastructure dynamic") Signed-off-by: Shiraz Saleem Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 6329c971c22f..4b892ca2b13a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2501,7 +2501,7 @@ static int i40iw_get_hw_stats(struct ib_device *ibdev, return -ENOSYS; } - memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats)); + memcpy(&stats->value[0], hw_stats, sizeof(*hw_stats)); return stats->num_counters; } -- GitLab From d8c34b0cc0be6c70d9a7a5e5bb8da86af42771f9 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 21 Oct 2016 16:45:38 -0400 Subject: [PATCH 0181/1442] PCI: Check for PME in targeted sleep state commit 6496ebd7edf446fccf8266a1a70ffcb64252593e upstream. One some systems, the firmware does not allow certain PCI devices to be put in deep D-states. This can cause problems for wakeup signalling, if the device does not support PME# in the deepest allowed suspend state. For example, Pierre reports that on his system, ACPI does not permit his xHCI host controller to go into D3 during runtime suspend -- but D3 is the only state in which the controller can generate PME# signals. As a result, the controller goes into runtime suspend but never wakes up, so it doesn't work properly. USB devices plugged into the controller are never detected. If the device relies on PME# for wakeup signals but is not capable of generating PME# in the target state, the PCI core should accurately report that it cannot do wakeup from runtime suspend. This patch modifies the pci_dev_run_wake() routine to add this check. Reported-by: Pierre de Villemereuil Tested-by: Pierre de Villemereuil Signed-off-by: Alan Stern Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki CC: Lukas Wunner Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index ba34907538f6..eda6a7cf0e54 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2106,6 +2106,10 @@ bool pci_dev_run_wake(struct pci_dev *dev) if (!dev->pme_support) return false; + /* PME-capable in principle, but not from the intended sleep state */ + if (!pci_pme_capable(dev, pci_target_state(dev))) + return false; + while (bus->parent) { struct pci_dev *bridge = bus->self; -- GitLab From fc6cb9c303e289e0772cd9db593db9acd15cd678 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:09 +0100 Subject: [PATCH 0182/1442] libceph: verify authorize reply on connect commit 5c056fdc5b474329037f2aa18401bd73033e0ce0 upstream. After sending an authorizer (ceph_x_authorize_a + ceph_x_authorize_b), the client gets back a ceph_x_authorize_reply, which it is supposed to verify to ensure the authenticity and protect against replay attacks. The code for doing this is there (ceph_x_verify_authorizer_reply(), ceph_auth_verify_authorizer_reply() + plumbing), but it is never invoked by the the messenger. AFAICT this goes back to 2009, when ceph authentication protocols support was added to the kernel client in 4e7a5dcd1bba ("ceph: negotiate authentication protocol; implement AUTH_NONE protocol"). The second param of ceph_connection_operations::verify_authorizer_reply is unused all the way down. Pass 0 to facilitate backporting, and kill it in the next commit. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index a5502898ea33..2efb335deada 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2027,6 +2027,19 @@ static int process_connect(struct ceph_connection *con) dout("process_connect on %p tag %d\n", con, (int)con->in_tag); + if (con->auth_reply_buf) { + /* + * Any connection that defines ->get_authorizer() + * should also define ->verify_authorizer_reply(). + * See get_connect_authorizer(). + */ + ret = con->ops->verify_authorizer_reply(con, 0); + if (ret < 0) { + con->error_msg = "bad authorize reply"; + return ret; + } + } + switch (con->in_reply.tag) { case CEPH_MSGR_TAG_FEATURES: pr_err("%s%lld %s feature set mismatch," -- GitLab From b775b86a5fe8f45500ce027a60ca0f1e1f49a13e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 5 Sep 2016 21:42:32 -0400 Subject: [PATCH 0183/1442] nfs_write_end(): fix handling of short copies commit c0cf3ef5e0f47e385920450b245d22bead93e7ad upstream. What matters when deciding if we should make a page uptodate is not how much we _wanted_ to copy, but how much we actually have copied. As it is, on architectures that do not zero tail on short copy we can leave uninitialized data in page marked uptodate. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/nfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 9ea85ae23c32..a1de8ef63e56 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -374,7 +374,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, */ if (!PageUptodate(page)) { unsigned pglen = nfs_page_length(page); - unsigned end = offset + len; + unsigned end = offset + copied; if (pglen == 0) { zero_user_segments(page, 0, offset, -- GitLab From 24b049fa0cd93e9e3e4166de8937b197b101a585 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2016 17:54:32 -0400 Subject: [PATCH 0184/1442] pNFS: On error, do not send LAYOUTGET until the LAYOUTRETURN has completed commit 6604b203fb6394ed1f24c21bfa3c207e5ae8e461 upstream. If there is an I/O error, we should not call LAYOUTGET until the LAYOUTRETURN that reports the error is complete. Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs.c | 6 +++++- fs/nfs/pnfs.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 259ef85f435a..d3eb4d7f0cbc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -944,6 +944,7 @@ static void pnfs_clear_layoutcommit(struct inode *inode, void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) { clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); + clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags); smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); @@ -957,8 +958,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, /* Serialise LAYOUTGET/LAYOUTRETURN */ if (atomic_read(&lo->plh_outstanding) != 0) return false; - if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) return false; + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); pnfs_get_layout_hdr(lo); if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { if (stateid != NULL) { @@ -1950,6 +1952,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, spin_lock(&inode->i_lock); pnfs_set_plh_return_info(lo, range.iomode, 0); + /* Block LAYOUTGET */ + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); /* * mark all matching lsegs so that we are sure to have no live * segments at hand when sending layoutreturn. See pnfs_put_lseg() diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 5c295512c967..44cad8afda0e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -96,6 +96,7 @@ enum { NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_RETURN, /* layoutreturn in progress */ + NFS_LAYOUT_RETURN_LOCK, /* Serialise layoutreturn */ NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ -- GitLab From f32659df6e154b40391afe7a5db3f0879ad6e70a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Nov 2016 13:10:48 -0500 Subject: [PATCH 0185/1442] pNFS: Don't clear the layout stateid if a layout return is outstanding commit 7b650994ab07434ae58a247dc9ac87d2488ca75c upstream. If we no longer hold any layout segments, we're normally expected to consider the layout stateid to be invalid. However we cannot assume this if we're about to, or in the process of sending a layoutreturn. Fixes: 334a8f37115b ("pNFS: Don't forget the layout stateid if...") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d3eb4d7f0cbc..106dee9dca34 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -411,7 +411,9 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); - if (list_empty(&lo->plh_segs)) { + if (list_empty(&lo->plh_segs) && + !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && + !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { if (atomic_read(&lo->plh_outstanding) == 0) set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); -- GitLab From e523ce8c99f9bbd530fda7d46c35409fee88a50f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Nov 2016 14:34:18 -0500 Subject: [PATCH 0186/1442] pNFS: Clear NFS_LAYOUT_RETURN_REQUESTED when invalidating the layout stateid commit ae5a459d5f65c3e83f3e14068dde5fb9c9d81807 upstream. We must ensure that we don't schedule a layoutreturn if the layout stateid has been marked as invalid. Fixes: 2a59a0411671e ("pNFS: Fix pnfs_set_layout_stateid() to clear...") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 106dee9dca34..a106f709c1a7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -299,6 +299,14 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) } } +static void +pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) +{ + lo->plh_return_iomode = 0; + lo->plh_return_seq = 0; + clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); +} + /* * Mark a pnfs_layout_hdr and all associated layout segments as invalid * @@ -317,6 +325,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, }; set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + pnfs_clear_layoutreturn_info(lo); return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); } @@ -818,14 +827,6 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) pnfs_destroy_layouts_byclid(clp, false); } -static void -pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) -{ - lo->plh_return_iomode = 0; - lo->plh_return_seq = 0; - clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); -} - /* update lo->plh_stateid with new if is more recent */ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, -- GitLab From ba15defe1807ff546ab4d14beb2f9602789c0170 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 Nov 2016 15:12:39 -0500 Subject: [PATCH 0187/1442] pNFS: Fix a deadlock between read resends and layoutreturn commit 54e4a0dfa25d9365c4e80a639e80d9213eb6edbe upstream. We must not call nfs_pageio_init_read() on a new nfs_pageio_descriptor while holding a reference to a layout segment, as that can deadlock pnfs_update_layout(). Fixes: d67ae825a59d6 ("pnfs/flexfiles: Add the FlexFile Layout Driver") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/flexfilelayout/flexfilelayout.c | 4 ++++ fs/nfs/pnfs.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 98ace127bf86..a5c38889e7ae 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -28,6 +28,9 @@ static struct group_info *ff_zero_group; +static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, + struct nfs_pgio_header *hdr); + static struct pnfs_layout_hdr * ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) { @@ -1293,6 +1296,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->pgio_mirror_idx + 1, &hdr->pgio_mirror_idx)) goto out_eagain; + ff_layout_read_record_layoutstats_done(task, hdr); pnfs_read_resend_pnfs(hdr); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a106f709c1a7..31b107e196fd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2293,6 +2293,10 @@ void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) struct nfs_pageio_descriptor pgio; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + /* Prevent deadlocks with layoutreturn! */ + pnfs_put_lseg(hdr->lseg); + hdr->lseg = NULL; + nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); -- GitLab From 369b330c9d821196af71cc37f362f37b59e4ba59 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 5 Dec 2016 15:10:11 +1100 Subject: [PATCH 0188/1442] SUNRPC: fix refcounting problems with auth_gss messages. commit 1cded9d2974fe4fe339fc0ccd6638b80d465ab2c upstream. There are two problems with refcounting of auth_gss messages. First, the reference on the pipe->pipe list (taken by a call to rpc_queue_upcall()) is not counted. It seems to be assumed that a message in pipe->pipe will always also be in pipe->in_downcall, where it is correctly reference counted. However there is no guaranty of this. I have a report of a NULL dereferences in rpc_pipe_read() which suggests a msg that has been freed is still on the pipe->pipe list. One way I imagine this might happen is: - message is queued for uid=U and auth->service=S1 - rpc.gssd reads this message and starts processing. This removes the message from pipe->pipe - message is queued for uid=U and auth->service=S2 - rpc.gssd replies to the first message. gss_pipe_downcall() calls __gss_find_upcall(pipe, U, NULL) and it finds the *second* message, as new messages are placed at the head of ->in_downcall, and the service type is not checked. - This second message is removed from ->in_downcall and freed by gss_release_msg() (even though it is still on pipe->pipe) - rpc.gssd tries to read another message, and dereferences a pointer to this message that has just been freed. I fix this by incrementing the reference count before calling rpc_queue_upcall(), and decrementing it if that fails, or normally in gss_pipe_destroy_msg(). It seems strange that the reply doesn't target the message more precisely, but I don't know all the details. In any case, I think the reference counting irregularity became a measureable bug when the extra arg was added to __gss_find_upcall(), hence the Fixes: line below. The second problem is that if rpc_queue_upcall() fails, the new message is not freed. gss_alloc_msg() set the ->count to 1, gss_add_msg() increments this to 2, gss_unhash_msg() decrements to 1, then the pointer is discarded so the memory never gets freed. Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service") Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1011250 Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/auth_gss.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 3dfd769dc5b5..16cea00c959b 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -541,9 +541,13 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred) return gss_new; gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { - int res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); + int res; + atomic_inc(&gss_msg->count); + res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); + atomic_dec(&gss_msg->count); + gss_release_msg(gss_new); gss_msg = ERR_PTR(res); } } else @@ -836,6 +840,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) warn_gssd(); gss_release_msg(gss_msg); } + gss_release_msg(gss_msg); } static void gss_pipe_dentry_destroy(struct dentry *dir, -- GitLab From 4b23132d6cf56607626760458ff18d3a30a88bd9 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 24 Nov 2016 00:02:07 +1100 Subject: [PATCH 0189/1442] powerpc/64e: Convert cmpi to cmpwi in head_64.S commit f87f253bac3ce4a4eb2a60a1ae604d74e65f9042 upstream. From 80f23935cadb ("powerpc: Convert cmp to cmpd in idle enter sequence"): PowerPC's "cmp" instruction has four operands. Normally people write "cmpw" or "cmpd" for the second cmp operand 0 or 1. But, frequently people forget, and write "cmp" with just three operands. With older binutils this is silently accepted as if this was "cmpw", while often "cmpd" is wanted. With newer binutils GAS will complain about this for 64-bit code. For 32-bit code it still silently assumes "cmpw" is what is meant. In this case, cmpwi is called for, so this is just a build fix for new toolchains. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/head_64.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 04c546e20cc0..1f7f908f186e 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -214,9 +214,9 @@ booting_thread_hwid: */ _GLOBAL(book3e_start_thread) LOAD_REG_IMMEDIATE(r5, MSR_KERNEL) - cmpi 0, r3, 0 + cmpwi r3, 0 beq 10f - cmpi 0, r3, 1 + cmpwi r3, 1 beq 11f /* If the thread id is invalid, just exit. */ b 13f @@ -241,9 +241,9 @@ _GLOBAL(book3e_start_thread) * r3 = the thread physical id */ _GLOBAL(book3e_stop_thread) - cmpi 0, r3, 0 + cmpwi r3, 0 beq 10f - cmpi 0, r3, 1 + cmpwi r3, 1 beq 10f /* If the thread id is invalid, just exit. */ b 13f -- GitLab From efcb3d9442fffe6dd4d12a8c70fe9e6a5423c4df Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 29 Nov 2016 10:47:32 -0800 Subject: [PATCH 0190/1442] powerpc/ps3: Fix system hang with GCC 5 builds commit 6dff5b67054e17c91bd630bcdda17cfca5aa4215 upstream. GCC 5 generates different code for this bootwrapper null check that causes the PS3 to hang very early in its bootup. This check is of limited value, so just get rid of it. Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/boot/ps3-head.S | 5 ----- arch/powerpc/boot/ps3.c | 8 +------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/powerpc/boot/ps3-head.S b/arch/powerpc/boot/ps3-head.S index b6fcbaf5027b..3dc44b05fb97 100644 --- a/arch/powerpc/boot/ps3-head.S +++ b/arch/powerpc/boot/ps3-head.S @@ -57,11 +57,6 @@ __system_reset_overlay: bctr 1: - /* Save the value at addr zero for a null pointer write check later. */ - - li r4, 0 - lwz r3, 0(r4) - /* Primary delays then goes to _zimage_start in wrapper. */ or 31, 31, 31 /* db16cyc */ diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index 4ec2d86d3c50..a05558a7e51a 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -119,13 +119,12 @@ void ps3_copy_vectors(void) flush_cache((void *)0x100, 512); } -void platform_init(unsigned long null_check) +void platform_init(void) { const u32 heapsize = 0x1000000 - (u32)_end; /* 16MiB */ void *chosen; unsigned long ft_addr; u64 rm_size; - unsigned long val; console_ops.write = ps3_console_write; platform_ops.exit = ps3_exit; @@ -153,11 +152,6 @@ void platform_init(unsigned long null_check) printf(" flat tree at 0x%lx\n\r", ft_addr); - val = *(unsigned long *)0; - - if (val != null_check) - printf("null check failed: %lx != %lx\n\r", val, null_check); - ((kernel_entry_t)0)(ft_addr, 0, NULL); ps3_exit(); -- GitLab From d5cf5ae25c95f915b2c7a2dfc3e3767d72bf2b85 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 28 Nov 2016 12:42:26 +1100 Subject: [PATCH 0191/1442] powerpc/boot: Request no dynamic linker for boot wrapper commit ff45000fcb56b5b0f1a14a865d3541746d838a0a upstream. The boot wrapper performs its own relocations and does not require PT_INTERP segment. However currently we don't tell the linker that. Prior to binutils 2.28 that works OK. But since binutils commit 1a9ccd70f9a7 ("Fix the linker so that it will not silently generate ELF binaries with invalid program headers. Fix readelf to report such invalid binaries.") binutils tries to create a program header segment due to PT_INTERP, and the link fails because there is no space for it: ld: arch/powerpc/boot/zImage.pseries: Not enough room for program headers, try linking with -N ld: final link failed: Bad value So tell the linker not to do that, by passing --no-dynamic-linker. Reported-by: Anton Blanchard Signed-off-by: Nicholas Piggin [mpe: Drop dependency on ld-version.sh and massage change log] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/boot/wrapper | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 404b3aabdb4d..76fe3ccfd381 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -181,6 +181,28 @@ case "$elfformat" in elf32-powerpc) format=elf32ppc ;; esac +ld_version() +{ + # Poached from scripts/ld-version.sh, but we don't want to call that because + # this script (wrapper) is distributed separately from the kernel source. + # Extract linker version number from stdin and turn into single number. + awk '{ + gsub(".*\\)", ""); + gsub(".*version ", ""); + gsub("-.*", ""); + split($1,a, "."); + print a[1]*100000000 + a[2]*1000000 + a[3]*10000; + exit + }' +} + +# Do not include PT_INTERP segment when linking pie. Non-pie linking +# just ignores this option. +LD_VERSION=$(${CROSS}ld --version | ld_version) +LD_NO_DL_MIN_VERSION=$(echo 2.26 | ld_version) +if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then + nodl="--no-dynamic-linker" +fi platformo=$object/"$platform".o lds=$object/zImage.lds @@ -446,7 +468,7 @@ if [ "$platform" != "miboot" ]; then text_start="-Ttext $link_address" fi #link everything - ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \ + ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" \ $platformo $tmp $object/wrapper.a rm $tmp fi -- GitLab From 6838ac125324b91eeab5ccaf99e4a9bd1f8a7895 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 28 Oct 2016 14:15:02 -0700 Subject: [PATCH 0192/1442] of, numa: Return NUMA_NO_NODE from disable of_node_to_nid() if nid not possible. commit b6cc9474e2dd9f0c19b694b40961d81117f1e918 upstream. On arm64 NUMA kernels we can pass "numa=off" on the command line to disable NUMA. A side effect of this is that kmalloc_node() calls to non-zero nodes will crash the system with an OOPS: [ 0.000000] ITS@0x0000901000020000: allocated 2097152 Devices @10002000000 (flat, esz 8, psz 64K, shr 1) [ 0.000000] Unable to handle kernel NULL pointer dereference at virtual address 00001680 [ 0.000000] pgd = fffffc0009470000 [ 0.000000] [00001680] *pgd=0000010ffff90003, *pud=0000010ffff90003, *pmd=0000010ffff90003, *pte=0000000000000000 [ 0.000000] Internal error: Oops: 96000006 [#1] SMP . . . [ 0.000000] [] __alloc_pages_nodemask+0xa4/0xe68 [ 0.000000] [] new_slab+0xd0/0x564 [ 0.000000] [] ___slab_alloc+0x2e4/0x514 [ 0.000000] [] __slab_alloc+0x48/0x58 [ 0.000000] [] __kmalloc_node+0xd0/0x2dc [ 0.000000] [] __irq_domain_add+0x7c/0x164 [ 0.000000] [] its_probe+0x784/0x81c [ 0.000000] [] its_init+0x48/0x1b0 [ 0.000000] [] gic_init_bases+0x228/0x360 [ 0.000000] [] gic_of_init+0x148/0x1cc [ 0.000000] [] of_irq_init+0x184/0x298 [ 0.000000] [] irqchip_init+0x14/0x38 [ 0.000000] [] init_IRQ+0xc/0x30 [ 0.000000] [] start_kernel+0x240/0x3b8 [ 0.000000] [] __primary_switched+0x30/0x6c [ 0.000000] Code: 912ec2a0 b9403809 0a0902fb 37b007db (f9400300) . . . This is caused by code like this in kernel/irq/irqdomain.c domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), GFP_KERNEL, of_node_to_nid(of_node)); When NUMA is disabled, the concept of a node is really undefined, so of_node_to_nid() should unconditionally return NUMA_NO_NODE. Fix by returning NUMA_NO_NODE when the nid is not in the set of possible nodes. Reported-by: Gilbert Netzer Signed-off-by: David Daney Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/of_numa.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index f63d4b0deff0..a53982a330ea 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -176,7 +176,12 @@ int of_node_to_nid(struct device_node *device) np->name); of_node_put(np); - if (!r) + /* + * If numa=off passed on command line, or with a defective + * device tree, the nid may not be in the set of possible + * nodes. Check for this case and return NUMA_NO_NODE. + */ + if (!r && nid < MAX_NUMNODES && node_possible(nid)) return nid; return NUMA_NO_NODE; -- GitLab From 630a2ef354bb569e84a874ec59f45f82c6c5ff04 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 10 Dec 2016 08:12:05 -0800 Subject: [PATCH 0193/1442] libnvdimm, pfn: fix align attribute commit af7d9f0c57941b465043681cb5c3410f7f3f1a41 upstream. Fix the format specifier so that the attribute can be parsed correctly. Currently it returns decimal 1000 for a 4096-byte alignment. Reported-by: Dave Jiang Fixes: 315c562536c4 ("libnvdimm, pfn: add 'align' attribute, default to HPAGE_SIZE") Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/pfn_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index cea8350fbc7e..a2ac9e641aa9 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -108,7 +108,7 @@ static ssize_t align_show(struct device *dev, { struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); - return sprintf(buf, "%lx\n", nd_pfn->align); + return sprintf(buf, "%ld\n", nd_pfn->align); } static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf) -- GitLab From 173fd37a3d2135fcf17a3240a71ca9c79ae151ab Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 21 Nov 2016 16:35:30 -0800 Subject: [PATCH 0194/1442] target/user: Fix use-after-free of tcmu_cmds if they are expired commit d0905ca757bc40bd1ebc261a448a521b064777d7 upstream. Don't free the cmd in tcmu_check_expired_cmd, it's still referenced by an entry in our cmd_id->cmd idr. If userspace ever resumes processing, tcmu_handle_completions() will use the now-invalid cmd pointer. Instead, don't free cmd. It will be freed by tcmu_handle_completion() if userspace ever recovers, or tcmu_free_device if not. Reported-by: Bryant G Ly Tested-by: Bryant G Ly Signed-off-by: Andy Grover Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_user.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 47562509b489..70c143a5c38c 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -685,8 +685,6 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) target_complete_cmd(cmd->se_cmd, SAM_STAT_CHECK_CONDITION); cmd->se_cmd = NULL; - kmem_cache_free(tcmu_cmd_cache, cmd); - return 0; } -- GitLab From d024532a97db32db4b0fc4cd27c7a9ac15b84e5b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 24 Nov 2016 22:10:23 +0000 Subject: [PATCH 0195/1442] kconfig/nconf: Fix hang when editing symbol with a long prompt commit 79e51b5c2deea542b3bb8c66e0d502230b017dde upstream. Currently it is impossible to edit the value of a config symbol with a prompt longer than (terminal width - 2) characters. dialog_inputbox() calculates a negative x-offset for the input window and newwin() fails as this is invalid. It also doesn't check for this failure, so it busy-loops calling wgetch(NULL) which immediately returns -1. The additions in the offset calculations also don't match the intended size of the window. Limit the window size and calculate the offset similarly to show_scroll_win(). Fixes: 692d97c380c6 ("kconfig: new configuration interface (nconfig)") Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- scripts/kconfig/nconf.gui.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c index 8275f0e55106..4b2f44c20caf 100644 --- a/scripts/kconfig/nconf.gui.c +++ b/scripts/kconfig/nconf.gui.c @@ -364,12 +364,14 @@ int dialog_inputbox(WINDOW *main_window, WINDOW *prompt_win; WINDOW *form_win; PANEL *panel; - int i, x, y; + int i, x, y, lines, columns, win_lines, win_cols; int res = -1; int cursor_position = strlen(init); int cursor_form_win; char *result = *resultp; + getmaxyx(stdscr, lines, columns); + if (strlen(init)+1 > *result_len) { *result_len = strlen(init)+1; *resultp = result = realloc(result, *result_len); @@ -386,14 +388,19 @@ int dialog_inputbox(WINDOW *main_window, if (title) prompt_width = max(prompt_width, strlen(title)); + win_lines = min(prompt_lines+6, lines-2); + win_cols = min(prompt_width+7, columns-2); + prompt_lines = max(win_lines-6, 0); + prompt_width = max(win_cols-7, 0); + /* place dialog in middle of screen */ - y = (getmaxy(stdscr)-(prompt_lines+4))/2; - x = (getmaxx(stdscr)-(prompt_width+4))/2; + y = (lines-win_lines)/2; + x = (columns-win_cols)/2; strncpy(result, init, *result_len); /* create the windows */ - win = newwin(prompt_lines+6, prompt_width+7, y, x); + win = newwin(win_lines, win_cols, y, x); prompt_win = derwin(win, prompt_lines+1, prompt_width, 2, 2); form_win = derwin(win, 1, prompt_width, prompt_lines+3, 2); keypad(form_win, TRUE); -- GitLab From 3f3a6bbe6f9f5e895d8945494173594ee51632da Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 16 Dec 2016 13:42:06 -0500 Subject: [PATCH 0196/1442] sg_write()/bsg_write() is not fit to be called under KERNEL_DS commit 128394eff343fc6d2f32172f03e24829539c5835 upstream. Both damn things interpret userland pointers embedded into the payload; worse, they are actually traversing those. Leaving aside the bad API design, this is very much _not_ safe to call with KERNEL_DS. Bail out early if that happens. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- block/bsg.c | 3 +++ drivers/scsi/sg.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/block/bsg.c b/block/bsg.c index d214e929ce18..b9a53615bdef 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -655,6 +655,9 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) dprintk("%s: write %Zd bytes\n", bd->name, count); + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) + return -EINVAL; + bsg_set_block(bd, file); bytes_written = 0; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 070332eb41f3..dbe5b4b95df0 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -581,6 +581,9 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) sg_io_hdr_t *hp; unsigned char cmnd[SG_MAX_CDB_SIZE]; + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) + return -EINVAL; + if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, -- GitLab From 2037b1b46cdb2cb073b835473bd6c168929edbbd Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 21 Dec 2016 11:28:49 +0100 Subject: [PATCH 0197/1442] net: mvpp2: fix dma unmapping of TX buffers for fragments commit 8354491c9d5b06709384cea91d13019bf5e61449 upstream. Since commit 71ce391dfb784 ("net: mvpp2: enable proper per-CPU TX buffers unmapping"), we are not correctly DMA unmapping TX buffers for fragments. Indeed, the mvpp2_txq_inc_put() function only stores in the txq_cpu->tx_buffs[] array the physical address of the buffer to be DMA-unmapped when skb != NULL. In addition, when DMA-unmapping, we use skb_headlen(skb) to get the size to be unmapped. Both of this works fine for TX descriptors that are associated directly to a SKB, but not the ones that are used for fragments, with a NULL pointer as skb: - We have a NULL physical address when calling DMA unmap - skb_headlen(skb) crashes because skb is NULL This causes random crashes when fragments are used. To solve this problem, we need to: - Store the physical address of the buffer to be unmapped unconditionally, regardless of whether it is tied to a SKB or not. - Store the length of the buffer to be unmapped, which requires a new field. Instead of adding a third array to store the length of the buffer to be unmapped, and as suggested by David Miller, this commit refactors the tx_buffs[] and tx_skb[] arrays of 'struct mvpp2_txq_pcpu' into a separate structure 'mvpp2_txq_pcpu_buf', to which a 'size' field is added. Therefore, instead of having three arrays to allocate/free, we have a single one, which also improve data locality, reducing the impact on the CPU cache. Fixes: 71ce391dfb784 ("net: mvpp2: enable proper per-CPU TX buffers unmapping") Reported-by: Raphael G Cc: Raphael G Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvpp2.c | 59 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 1026c452e39d..930c8165f2a8 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -770,6 +770,17 @@ struct mvpp2_rx_desc { u32 reserved8; }; +struct mvpp2_txq_pcpu_buf { + /* Transmitted SKB */ + struct sk_buff *skb; + + /* Physical address of transmitted buffer */ + dma_addr_t phys; + + /* Size transmitted */ + size_t size; +}; + /* Per-CPU Tx queue control */ struct mvpp2_txq_pcpu { int cpu; @@ -785,11 +796,8 @@ struct mvpp2_txq_pcpu { /* Number of Tx DMA descriptors reserved for each CPU */ int reserved_num; - /* Array of transmitted skb */ - struct sk_buff **tx_skb; - - /* Array of transmitted buffers' physical addresses */ - dma_addr_t *tx_buffs; + /* Infos about transmitted buffers */ + struct mvpp2_txq_pcpu_buf *buffs; /* Index of last TX DMA descriptor that was inserted */ int txq_put_index; @@ -979,10 +987,11 @@ static void mvpp2_txq_inc_put(struct mvpp2_txq_pcpu *txq_pcpu, struct sk_buff *skb, struct mvpp2_tx_desc *tx_desc) { - txq_pcpu->tx_skb[txq_pcpu->txq_put_index] = skb; - if (skb) - txq_pcpu->tx_buffs[txq_pcpu->txq_put_index] = - tx_desc->buf_phys_addr; + struct mvpp2_txq_pcpu_buf *tx_buf = + txq_pcpu->buffs + txq_pcpu->txq_put_index; + tx_buf->skb = skb; + tx_buf->size = tx_desc->data_size; + tx_buf->phys = tx_desc->buf_phys_addr; txq_pcpu->txq_put_index++; if (txq_pcpu->txq_put_index == txq_pcpu->size) txq_pcpu->txq_put_index = 0; @@ -4401,17 +4410,16 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port, int i; for (i = 0; i < num; i++) { - dma_addr_t buf_phys_addr = - txq_pcpu->tx_buffs[txq_pcpu->txq_get_index]; - struct sk_buff *skb = txq_pcpu->tx_skb[txq_pcpu->txq_get_index]; + struct mvpp2_txq_pcpu_buf *tx_buf = + txq_pcpu->buffs + txq_pcpu->txq_get_index; mvpp2_txq_inc_get(txq_pcpu); - dma_unmap_single(port->dev->dev.parent, buf_phys_addr, - skb_headlen(skb), DMA_TO_DEVICE); - if (!skb) + dma_unmap_single(port->dev->dev.parent, tx_buf->phys, + tx_buf->size, DMA_TO_DEVICE); + if (!tx_buf->skb) continue; - dev_kfree_skb_any(skb); + dev_kfree_skb_any(tx_buf->skb); } } @@ -4651,15 +4659,10 @@ static int mvpp2_txq_init(struct mvpp2_port *port, for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); txq_pcpu->size = txq->size; - txq_pcpu->tx_skb = kmalloc(txq_pcpu->size * - sizeof(*txq_pcpu->tx_skb), - GFP_KERNEL); - if (!txq_pcpu->tx_skb) - goto error; - - txq_pcpu->tx_buffs = kmalloc(txq_pcpu->size * - sizeof(dma_addr_t), GFP_KERNEL); - if (!txq_pcpu->tx_buffs) + txq_pcpu->buffs = kmalloc(txq_pcpu->size * + sizeof(struct mvpp2_txq_pcpu_buf), + GFP_KERNEL); + if (!txq_pcpu->buffs) goto error; txq_pcpu->count = 0; @@ -4673,8 +4676,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port, error: for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); - kfree(txq_pcpu->tx_skb); - kfree(txq_pcpu->tx_buffs); + kfree(txq_pcpu->buffs); } dma_free_coherent(port->dev->dev.parent, @@ -4693,8 +4695,7 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port, for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); - kfree(txq_pcpu->tx_skb); - kfree(txq_pcpu->tx_buffs); + kfree(txq_pcpu->buffs); } if (txq->descs) -- GitLab From 6cb4179f97cf666639f2f11b40d45766bc1b4d85 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 1 Dec 2016 21:29:09 +0100 Subject: [PATCH 0198/1442] drm/i915/dsi: Fix chv_exec_gpio disabling the GPIOs it is setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 22ca0d4991169b76e753d767a45f1105c356bbb8 upstream. Set the CHV_GPIO_GPIOEN bit when updating GPIOs from chv_exec_gpio. Fixes: a0a6d4ffd2ad ("drm/i915/dsi: add support for gpio elements on CHV") Cc: Jani Nikula Cc: Ville Syrjälä Signed-off-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161201202925.12220-3-hdegoede@redhat.com Signed-off-by: Ville Syrjälä (cherry picked from commit b2b45fcd921e864a5e9bbc7aa55dee96d5e11c06) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index cd154ce6b6c1..34601574fc6e 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -296,7 +296,8 @@ static void chv_exec_gpio(struct drm_i915_private *dev_priv, mutex_lock(&dev_priv->sb_lock); vlv_iosf_sb_write(dev_priv, port, cfg1, 0); vlv_iosf_sb_write(dev_priv, port, cfg0, - CHV_GPIO_GPIOCFG_GPO | CHV_GPIO_GPIOTXSTATE(value)); + CHV_GPIO_GPIOEN | CHV_GPIO_GPIOCFG_GPO | + CHV_GPIO_GPIOTXSTATE(value)); mutex_unlock(&dev_priv->sb_lock); } -- GitLab From d816da6f57661157b3e056da426141f98d3bddde Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 2 Dec 2016 15:29:04 +0100 Subject: [PATCH 0199/1442] drm/i915/dsi: Do not clear DPOUNIT_CLOCK_GATE_DISABLE from vlv_init_display_clock_gating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bb98e72adaf9d19719aba35f802d4836f5d5176c upstream. On my Cherrytrail CUBE iwork8 Air tablet PIPE-A would get stuck on loading i915 at boot 1 out of every 3 boots, resulting in a non functional LCD. Once the i915 driver has successfully loaded, the panel can be disabled / enabled without hitting this issue. The getting stuck is caused by vlv_init_display_clock_gating() clearing the DPOUNIT_CLOCK_GATE_DISABLE bit in DSPCLK_GATE_D when called from chv_pipe_power_well_ops.enable() on driver load, while a pipe is enabled driving the DSI LCD by the BIOS. Clearing this bit while DSI is in use is a known issue and intel_dsi_pre_enable() / intel_dsi_post_disable() already set / clear it as appropriate. This commit modifies vlv_init_display_clock_gating() to leave the DPOUNIT_CLOCK_GATE_DISABLE bit alone fixing the pipe getting stuck. Changes in v2: -Replace PIPE-A with "a pipe" or "the pipe" in the commit msg and comment Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97330 Signed-off-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161202142904.25613-1-hdegoede@redhat.com Signed-off-by: Ville Syrjälä (cherry picked from commit 721d484563e1a51ada760089c490cbc47e909756) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_runtime_pm.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index a38c2fefe85a..23ed3f5972fa 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1065,7 +1065,18 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv) { - I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); + u32 val; + + /* + * On driver load, a pipe may be active and driving a DSI display. + * Preserve DPOUNIT_CLOCK_GATE_DISABLE to avoid the pipe getting stuck + * (and never recovering) in this case. intel_dsi_post_disable() will + * clear it when we turn off the display. + */ + val = I915_READ(DSPCLK_GATE_D); + val &= DPOUNIT_CLOCK_GATE_DISABLE; + val |= VRHUNIT_CLOCK_GATE_DISABLE; + I915_WRITE(DSPCLK_GATE_D, val); /* * Disable trickle feed and enable pnd deadline calculation -- GitLab From 159e2ea9ed4996e3398cb9174921a4543c785ed2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 14 Nov 2016 18:35:09 +0200 Subject: [PATCH 0200/1442] drm/i915: Fix cdclk vs. dev_cdclk mess when not recomputing things MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 14676ec6b1a6f2f7fa0bafd98ab42ce77be7a7d4 upstream. When we end up not recomputing the cdclk, we need to populate intel_state->cdclk with the "atomic_cdclk_freq" instead of the current cdclk_freq. When no pipes are active, the actual cdclk_freq may be lower than what the configuration of the planes and pipes would require from the point of view of the software state. This fixes bogus WARNS from skl_max_scale() which is trying to check the plane software state against the cdclk frequency. So any time it got called during DPMS off for instance, we might have tripped the warn if the current mode would have required a higher than minimum cdclk. v2: Drop the dev_cdclk stuff (Maarten) Cc: Maarten Lankhorst Cc: Mika Kahola Cc: bruno.pagani@ens-lyon.org Cc: Daniel J Blueman Cc: Paul Bolle Cc: Joseph Yasi Tested-by: Paul Bolle Tested-by: Joseph Yasi (v1) Fixes: 1a617b77658e ("drm/i915: Keep track of the cdclk as if all crtc's were active.") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98214 Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1479141311-11904-2-git-send-email-ville.syrjala@linux.intel.com (cherry picked from commit e0ca7a6be38ce603d26df5707c22e53870a623e0) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3cb70d73239b..592694c9de7d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13970,8 +13970,9 @@ static int intel_modeset_checks(struct drm_atomic_state *state) DRM_DEBUG_KMS("New cdclk calculated to be atomic %u, actual %u\n", intel_state->cdclk, intel_state->dev_cdclk); - } else + } else { to_intel_atomic_state(state)->cdclk = dev_priv->atomic_cdclk_freq; + } intel_modeset_clear_plls(state); @@ -14072,8 +14073,9 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) return ret; - } else - intel_state->cdclk = dev_priv->cdclk_freq; + } else { + intel_state->cdclk = dev_priv->atomic_cdclk_freq; + } ret = drm_atomic_helper_check_planes(dev, state); if (ret) -- GitLab From 6396411054430a6473233735b6e0208759b68291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 29 Nov 2016 16:13:57 +0200 Subject: [PATCH 0201/1442] drm/i915: Initialize dev_priv->atomic_cdclk_freq at init time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1f3dc3e334c1192ebe2939ea17ba12f4776f90c3 upstream. Looks like we're only initializing dev_priv->atomic_cdclk_freq at resume and commit times, not at init time. Let's do that as well. We're now hitting the 'WARN_ON(intel_state->cdclk == 0)' in hsw_compute_linetime_wm() on account of populating intel_state->cdclk from dev_priv->atomic_cdclk_freq. Previously we were mispopulating intel_state->cdclk with dev_priv->cdclk_freq which always had a proper value at init time and hence the WARN_ON() didn't trigger. Cc: Matthew Auld Reported-by: Matthew Auld Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98902 Fixes: 14676ec6b1a6 ("drm/i915: Fix cdclk vs. dev_cdclk mess when not recomputing things") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1480428837-4207-1-git-send-email-ville.syrjala@linux.intel.com Tested-by: Matthew Auld Reviewed-by: Matthew Auld (cherry picked from commit 6a259b1f8a9e99b1ed114f8bf8b0cfccee130e54) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 592694c9de7d..71441c329603 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -16443,6 +16443,7 @@ void intel_modeset_init(struct drm_device *dev) intel_update_czclk(dev_priv); intel_update_cdclk(dev); + dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; intel_shared_dpll_init(dev); -- GitLab From 18a00ac6d7f106901356e1fa9c082730a2318ed5 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 14 Dec 2016 12:55:37 -0200 Subject: [PATCH 0202/1442] drm/i915: skip the first 4k of stolen memory on everything >= gen8 commit 6ba0566cf2afcdb17bff882e3a95cbbcb22c4a83 upstream. BSpec got updated and this workaround is now listed as standard required programming for all subsequent projects. This is confirmed to fix Skylake screen flickering issues (probably caused by the fact that we initialized a ring in the first page of stolen, but I didn't 100% confirm this theory). v2: this is the patch that fixes the screen flickering, document it. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94605 Tested-by: Dominik Klementowski Signed-off-by: Paulo Zanoni Acked-by: Chris Wilson Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1481727338-9901-1-git-send-email-paulo.r.zanoni@intel.com (cherry picked from commit d43537610470d8829ebd17cd7842f47176e35ebd) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_stolen.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 59989e8ee5dc..9a71ed546b90 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -55,10 +55,9 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, return -ENODEV; /* See the comment at the drm_mm_init() call for more about this check. - * WaSkipStolenMemoryFirstPage:bdw,chv,kbl (incomplete) + * WaSkipStolenMemoryFirstPage:bdw+ (incomplete) */ - if (start < 4096 && (IS_GEN8(dev_priv) || - IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0))) + if (start < 4096 && INTEL_GEN(dev_priv) >= 8) start = 4096; mutex_lock(&dev_priv->mm.stolen_lock); -- GitLab From 8ed8791885aa14d545d8d5429f2be560c19addd2 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 14 Dec 2016 14:26:20 +0200 Subject: [PATCH 0203/1442] drm/i915: Fix setting of boost freq tunable commit b1b7ec985805e005055d1d471ca586a715ffc10a upstream. For limiting the max frequency of gpu, the max freq tunable is not enough to hard limit the max gap. We now have also per client boost max freq. When this tunable was introduced, it was mistakenly made read only. Allow user to gain control by setting it writable. Fixes: 29ecd78d3b79 ("drm/i915: Define a separate variable and control for RPS waitboost frequency") Cc: Chris Wilson Cc: Mika Kuoppala Cc: Daniel Vetter Cc: Jani Nikula Reviewed-by: Chris Wilson Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1481718380-9170-1-git-send-email-mika.kuoppala@intel.com (cherry picked from commit 73a798711314b54cbd4fe224e24db92c306a8d8c) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 1012eeea1324..306fc54c161b 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -460,7 +460,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); static DEVICE_ATTR(gt_cur_freq_mhz, S_IRUGO, gt_cur_freq_mhz_show, NULL); -static DEVICE_ATTR(gt_boost_freq_mhz, S_IRUGO, gt_boost_freq_mhz_show, gt_boost_freq_mhz_store); +static DEVICE_ATTR(gt_boost_freq_mhz, S_IRUGO | S_IWUSR, gt_boost_freq_mhz_show, gt_boost_freq_mhz_store); static DEVICE_ATTR(gt_max_freq_mhz, S_IRUGO | S_IWUSR, gt_max_freq_mhz_show, gt_max_freq_mhz_store); static DEVICE_ATTR(gt_min_freq_mhz, S_IRUGO | S_IWUSR, gt_min_freq_mhz_show, gt_min_freq_mhz_store); -- GitLab From 6f2fabab8d63908940450fa27136516323cc3ae9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2017 08:32:38 +0100 Subject: [PATCH 0204/1442] Linux 4.9.2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ab3cd5128889..c9ce897465c5 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 1 +SUBLEVEL = 2 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From cf49219364fd1529567fba45b22eee98b4f0e6c5 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 16 Nov 2016 22:15:28 +0100 Subject: [PATCH 0205/1442] iio: common: st_sensors: fix channel data parsing commit 65c8aea07de11b6507efa175edb44bd8b4488218 upstream. Using realbits as i2c/spi read len, when that value is not byte aligned (e.g 12 bits), lead to skip msb part of out data registers. Fix this taking into account scan_type.shift in addition to scan_type.realbits as read length: read_len = DIV_ROUND_UP(realbits + shift, 8) This fix has been tested on 8, 12, 16, 24 bit sensors Fixes: e7385de5291e ("iio:st_sensors: align on storagebits boundaries") Signed-off-by: Lorenzo Bianconi Tested-by: Linus Walleij Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/common/st_sensors/st_sensors_buffer.c | 4 +++- drivers/iio/common/st_sensors/st_sensors_core.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c index fe7775bb3740..df4045203a07 100644 --- a/drivers/iio/common/st_sensors/st_sensors_buffer.c +++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c @@ -30,7 +30,9 @@ static int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf) for_each_set_bit(i, indio_dev->active_scan_mask, num_data_channels) { const struct iio_chan_spec *channel = &indio_dev->channels[i]; - unsigned int bytes_to_read = channel->scan_type.realbits >> 3; + unsigned int bytes_to_read = + DIV_ROUND_UP(channel->scan_type.realbits + + channel->scan_type.shift, 8); unsigned int storage_bytes = channel->scan_type.storagebits >> 3; diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 975a1f19f747..d5cf7f31eaf9 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -483,8 +483,10 @@ static int st_sensors_read_axis_data(struct iio_dev *indio_dev, int err; u8 *outdata; struct st_sensor_data *sdata = iio_priv(indio_dev); - unsigned int byte_for_channel = ch->scan_type.realbits >> 3; + unsigned int byte_for_channel; + byte_for_channel = DIV_ROUND_UP(ch->scan_type.realbits + + ch->scan_type.shift, 8); outdata = kmalloc(byte_for_channel, GFP_KERNEL); if (!outdata) return -ENOMEM; -- GitLab From 7090b8da3836ea69c79da8d71141a318269c26a8 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 29 Dec 2016 02:16:36 +0900 Subject: [PATCH 0206/1442] iio: max44000: correct value in illuminance_integration_time_available commit b4e8a0eb718749455601fa7b283febc42cca8957 upstream. According to the datasheet, the shortest available integration time for ALS ADC conversion is 1.5625ms but illuminance_integration_time_available sysfs file shows wrong value. Cc: Crestez Dan Leonard Cc: Jonathan Cameron Cc: Hartmut Knaack Cc: Lars-Peter Clausen Cc: Peter Meerwald-Stadler Signed-off-by: Akinobu Mita Fixes: d5d8f49b6 ("max44000: Expose ambient sensor scaling") Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/light/max44000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c index 6511b20a2a29..a8ffa432bf0d 100644 --- a/drivers/iio/light/max44000.c +++ b/drivers/iio/light/max44000.c @@ -113,7 +113,7 @@ static const char max44000_int_time_avail_str[] = "0.100 " "0.025 " "0.00625 " - "0.001625"; + "0.0015625"; /* Available scales (internal to ulux) with pretty manual alignment: */ static const int max44000_scale_avail_ulux_array[] = { -- GitLab From ea991c8354c3dab7f7fe6c3a747bc473b9c935e2 Mon Sep 17 00:00:00 2001 From: Marcin Niestroj Date: Thu, 8 Dec 2016 15:22:58 +0100 Subject: [PATCH 0207/1442] iio: bmi160: Fix time needed to sleep after command execution commit 01d1f7a99e457952aa51849ed7c1cc4ced7bca4b upstream. Datasheet specifies typical and maximum execution times for which CMD register is occupied after previous command execution. We took these values as minimum and maximum time for usleep_range() call before making a new command execution. To be sure, that the CMD register is no longer occupied we need to wait *at least* the maximum time specified by datasheet. Signed-off-by: Marcin Niestroj Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/bmi160/bmi160_core.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c index e0251b8c1a52..5fb571d03153 100644 --- a/drivers/iio/imu/bmi160/bmi160_core.c +++ b/drivers/iio/imu/bmi160/bmi160_core.c @@ -66,10 +66,8 @@ #define BMI160_REG_DUMMY 0x7F -#define BMI160_ACCEL_PMU_MIN_USLEEP 3200 -#define BMI160_ACCEL_PMU_MAX_USLEEP 3800 -#define BMI160_GYRO_PMU_MIN_USLEEP 55000 -#define BMI160_GYRO_PMU_MAX_USLEEP 80000 +#define BMI160_ACCEL_PMU_MIN_USLEEP 3800 +#define BMI160_GYRO_PMU_MIN_USLEEP 80000 #define BMI160_SOFTRESET_USLEEP 1000 #define BMI160_CHANNEL(_type, _axis, _index) { \ @@ -151,20 +149,9 @@ static struct bmi160_regs bmi160_regs[] = { }, }; -struct bmi160_pmu_time { - unsigned long min; - unsigned long max; -}; - -static struct bmi160_pmu_time bmi160_pmu_time[] = { - [BMI160_ACCEL] = { - .min = BMI160_ACCEL_PMU_MIN_USLEEP, - .max = BMI160_ACCEL_PMU_MAX_USLEEP - }, - [BMI160_GYRO] = { - .min = BMI160_GYRO_PMU_MIN_USLEEP, - .max = BMI160_GYRO_PMU_MIN_USLEEP, - }, +static unsigned long bmi160_pmu_time[] = { + [BMI160_ACCEL] = BMI160_ACCEL_PMU_MIN_USLEEP, + [BMI160_GYRO] = BMI160_GYRO_PMU_MIN_USLEEP, }; struct bmi160_scale { @@ -289,7 +276,7 @@ int bmi160_set_mode(struct bmi160_data *data, enum bmi160_sensor_type t, if (ret < 0) return ret; - usleep_range(bmi160_pmu_time[t].min, bmi160_pmu_time[t].max); + usleep_range(bmi160_pmu_time[t], bmi160_pmu_time[t] + 1000); return 0; } -- GitLab From 755259ba2a3a40f67d36325a1b34d8f0a1d63894 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 14 Dec 2016 17:13:24 -0800 Subject: [PATCH 0208/1442] staging: octeon: Call SET_NETDEV_DEV() commit e7c9a3d9e432200fd4c17855c2c23ac784d6e833 upstream. The Octeon driver calls into PHYLIB which now checks for net_device->dev.parent, so make sure we do set it before calling into any MDIO/PHYLIB related function. Fixes: ec988ad78ed6 ("phy: Don't increment MDIO bus refcount unless it's a different owner") Reported-by: Aaro Koskinen Signed-off-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index d02e3e31ed29..12354440a334 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -776,6 +776,7 @@ static int cvm_oct_probe(struct platform_device *pdev) /* Initialize the device private structure. */ struct octeon_ethernet *priv = netdev_priv(dev); + SET_NETDEV_DEV(dev, &pdev->dev); dev->netdev_ops = &cvm_oct_pow_netdev_ops; priv->imode = CVMX_HELPER_INTERFACE_MODE_DISABLED; priv->port = CVMX_PIP_NUM_INPUT_PORTS; @@ -820,6 +821,7 @@ static int cvm_oct_probe(struct platform_device *pdev) } /* Initialize the device private structure. */ + SET_NETDEV_DEV(dev, &pdev->dev); priv = netdev_priv(dev); priv->netdev = dev; priv->of_node = cvm_oct_node_for_port(pip, interface, -- GitLab From 71c88fc3bde7582c551aaa0a2db0aefb3c98aa62 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Dec 2016 16:20:36 +0100 Subject: [PATCH 0209/1442] ALSA: hda - Fix up GPIO for ASUS ROG Ranger commit 85bcf96caba8b4a7c0805555638629ba3c67ea0c upstream. ASUS ROG Ranger VIII with ALC1150 codec requires the extra GPIO pin to up for the front panel. Just use the existing fixup for setting up the GPIO pins. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=189411 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3f75d1b83bf2..52c93fc4876a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2230,6 +2230,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1971, "Asus W2JC", ALC882_FIXUP_ASUS_W2JC), SND_PCI_QUIRK(0x1043, 0x835f, "Asus Eee 1601", ALC888_FIXUP_EEE1601), SND_PCI_QUIRK(0x1043, 0x84bc, "ASUS ET2700", ALC887_FIXUP_ASUS_BASS), + SND_PCI_QUIRK(0x1043, 0x8691, "ASUS ROG Ranger VIII", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x104d, 0x9047, "Sony Vaio TT", ALC889_FIXUP_VAIO_TT), SND_PCI_QUIRK(0x104d, 0x905a, "Sony Vaio Z", ALC882_FIXUP_NO_PRIMARY_HP), SND_PCI_QUIRK(0x104d, 0x9043, "Sony Vaio VGC-LN51JGB", ALC882_FIXUP_NO_PRIMARY_HP), -- GitLab From 5b2c3cafcafc38a9cdf789d7d3a515eae88bdb3b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 4 Jan 2017 21:38:16 +0100 Subject: [PATCH 0210/1442] ALSA: hda - Apply asus-mode8 fixup to ASUS X71SL commit c7efff9284dfde95a11aaa811c9d8ec8167f0f6e upstream. Although the old quirk table showed ASUS X71SL with ALC663 codec being compatible with asus-mode3 fixup, the bugzilla reporter explained that asus-model8 fits better for the dual headphone controls. So be it. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=191781 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 52c93fc4876a..758ac86a1d3a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6944,6 +6944,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51), SND_PCI_QUIRK(0x1043, 0x17bd, "ASUS N751", ALC668_FIXUP_ASUS_Nx51), + SND_PCI_QUIRK(0x1043, 0x1963, "ASUS X71SL", ALC662_FIXUP_ASUS_MODE8), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), -- GitLab From be4e3aec56679ecf0c7d4130c976c838a4855ef8 Mon Sep 17 00:00:00 2001 From: Ioan-Adrian Ratiu Date: Thu, 5 Jan 2017 00:37:46 +0200 Subject: [PATCH 0211/1442] ALSA: usb-audio: Fix irq/process data synchronization commit 1d0f953086f090a022f2c0e1448300c15372db46 upstream. Commit 16200948d83 ("ALSA: usb-audio: Fix race at stopping the stream") was incomplete causing another more severe kernel panic, so it got reverted. This fixes both the original problem and its fallout kernel race/crash. The original fix is to move the endpoint member NULL clearing logic inside wait_clear_urbs() so the irq triggering the urb completion doesn't call retire_capture/playback_urb() after the NULL clearing and generate a panic. However this creates a new race between snd_usb_endpoint_start()'s call to wait_clear_urbs() and the irq urb completion handler which again calls retire_capture/playback_urb() leading to a new NULL dereference. We keep the EP deactivation code in snd_usb_endpoint_start() because removing it will break the EP reference counting (see [1] [2] for info), however we don't need the "can_sleep" mechanism anymore because a new function was introduced (snd_usb_endpoint_sync_pending_stop()) which synchronizes pending stops and gets called inside the pcm prepare callback. It also makes sense to remove can_sleep because it was also removed from deactivate_urbs() signature in [3] so we benefit from more simplification. [1] commit 015618b90 ("ALSA: snd-usb: Fix URB cancellation at stream start") [2] commit e9ba389c5 ("ALSA: usb-audio: Fix scheduling-while-atomic bug in PCM capture stream") [3] commit ccc1696d5 ("ALSA: usb-audio: simplify endpoint deactivation code") Fixes: f8114f8583bb ("Revert "ALSA: usb-audio: Fix race at stopping the stream"") Signed-off-by: Ioan-Adrian Ratiu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/endpoint.c | 17 +++++++---------- sound/usb/endpoint.h | 2 +- sound/usb/pcm.c | 10 +++++----- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index c470251cea4b..c5251aaad844 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -534,6 +534,11 @@ static int wait_clear_urbs(struct snd_usb_endpoint *ep) alive, ep->ep_num); clear_bit(EP_FLAG_STOPPING, &ep->flags); + ep->data_subs = NULL; + ep->sync_slave = NULL; + ep->retire_data_urb = NULL; + ep->prepare_data_urb = NULL; + return 0; } @@ -898,9 +903,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep, /** * snd_usb_endpoint_start: start an snd_usb_endpoint * - * @ep: the endpoint to start - * @can_sleep: flag indicating whether the operation is executed in - * non-atomic context + * @ep: the endpoint to start * * A call to this function will increment the use count of the endpoint. * In case it is not already running, the URBs for this endpoint will be @@ -910,7 +913,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep, * * Returns an error if the URB submission failed, 0 in all other cases. */ -int snd_usb_endpoint_start(struct snd_usb_endpoint *ep, bool can_sleep) +int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) { int err; unsigned int i; @@ -924,8 +927,6 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep, bool can_sleep) /* just to be sure */ deactivate_urbs(ep, false); - if (can_sleep) - wait_clear_urbs(ep); ep->active_mask = 0; ep->unlink_mask = 0; @@ -1006,10 +1007,6 @@ void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep) if (--ep->use_count == 0) { deactivate_urbs(ep, false); - ep->data_subs = NULL; - ep->sync_slave = NULL; - ep->retire_data_urb = NULL; - ep->prepare_data_urb = NULL; set_bit(EP_FLAG_STOPPING, &ep->flags); } } diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h index 6428392d8f62..584f295d7c77 100644 --- a/sound/usb/endpoint.h +++ b/sound/usb/endpoint.h @@ -18,7 +18,7 @@ int snd_usb_endpoint_set_params(struct snd_usb_endpoint *ep, struct audioformat *fmt, struct snd_usb_endpoint *sync_ep); -int snd_usb_endpoint_start(struct snd_usb_endpoint *ep, bool can_sleep); +int snd_usb_endpoint_start(struct snd_usb_endpoint *ep); void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep); void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep); int snd_usb_endpoint_activate(struct snd_usb_endpoint *ep); diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 44d178ee9177..48afae053c56 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -218,7 +218,7 @@ int snd_usb_init_pitch(struct snd_usb_audio *chip, int iface, } } -static int start_endpoints(struct snd_usb_substream *subs, bool can_sleep) +static int start_endpoints(struct snd_usb_substream *subs) { int err; @@ -231,7 +231,7 @@ static int start_endpoints(struct snd_usb_substream *subs, bool can_sleep) dev_dbg(&subs->dev->dev, "Starting data EP @%p\n", ep); ep->data_subs = subs; - err = snd_usb_endpoint_start(ep, can_sleep); + err = snd_usb_endpoint_start(ep); if (err < 0) { clear_bit(SUBSTREAM_FLAG_DATA_EP_STARTED, &subs->flags); return err; @@ -260,7 +260,7 @@ static int start_endpoints(struct snd_usb_substream *subs, bool can_sleep) dev_dbg(&subs->dev->dev, "Starting sync EP @%p\n", ep); ep->sync_slave = subs->data_endpoint; - err = snd_usb_endpoint_start(ep, can_sleep); + err = snd_usb_endpoint_start(ep); if (err < 0) { clear_bit(SUBSTREAM_FLAG_SYNC_EP_STARTED, &subs->flags); return err; @@ -839,7 +839,7 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) /* for playback, submit the URBs now; otherwise, the first hwptr_done * updates for all URBs would happen at the same time when starting */ if (subs->direction == SNDRV_PCM_STREAM_PLAYBACK) - ret = start_endpoints(subs, true); + ret = start_endpoints(subs); unlock: snd_usb_unlock_shutdown(subs->stream->chip); @@ -1655,7 +1655,7 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream switch (cmd) { case SNDRV_PCM_TRIGGER_START: - err = start_endpoints(subs, false); + err = start_endpoints(subs); if (err < 0) return err; -- GitLab From d259b685373231b8c85f95633e0a702352077ef6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 Dec 2016 14:20:13 -0800 Subject: [PATCH 0212/1442] fscrypt: fix renaming and linking special files commit 42d97eb0ade31e1bc537d086842f5d6e766d9d51 upstream. Attempting to link a device node, named pipe, or socket file into an encrypted directory through rename(2) or link(2) always failed with EPERM. This happened because fscrypt_has_permitted_context() saw that the file was unencrypted and forbid creating the link. This behavior was unexpected because such files are never encrypted; only regular files, directories, and symlinks can be encrypted. To fix this, make fscrypt_has_permitted_context() always return true on special files. This will be covered by a test in my encryption xfstests patchset. Fixes: 9bd8212f981e ("ext4 crypto: add encryption policy and password salt support") Signed-off-by: Eric Biggers Reviewed-by: Richard Weinberger Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/crypto/policy.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 6865663aac69..abc18847b98d 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -171,6 +171,11 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) BUG_ON(1); } + /* No restrictions on file types which are never encrypted */ + if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && + !S_ISLNK(child->i_mode)) + return 1; + /* no restrictions if the parent directory is not encrypted */ if (!parent->i_sb->s_cop->is_encrypted(parent)) return 1; -- GitLab From e569eef6298adbcc98ca029cc6763b1d854b1bed Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 2 Jan 2017 17:43:15 +0100 Subject: [PATCH 0213/1442] parisc: Add line-break when printing segfault info commit b4a9eb4cd5966c8aad3d007d206a2cbda97d6928 upstream. Add a leading line break else printed line gets too long. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 8ff9253930af..1a0b4f63f0e9 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -234,7 +234,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long code, tsk->comm, code, address); print_vma_addr(KERN_CONT " in ", regs->iaoq[0]); - pr_cont(" trap #%lu: %s%c", code, trap_name(code), + pr_cont("\ntrap #%lu: %s%c", code, trap_name(code), vma ? ',':'\n'); if (vma) -- GitLab From 0c8033357b1d8fc4bba43cf1f87d2f61f3dd3fd2 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 26 Dec 2016 12:46:01 +0100 Subject: [PATCH 0214/1442] parisc: Mark cr16 clocksource unstable on SMP systems commit 41744213602a206f24adcb4a2b7551db3c700e72 upstream. The cr16 interval timer of each CPU is not syncronized to other cr16 timers in other CPUs in a SMP system. So, delay the registration of the cr16 clocksource until all CPUs have been detected and then - if we are on a SMP machine - mark the cr16 clocksource as unstable and lower it's rating before registering it at the clocksource framework. This patch fixes the stalled CPU warnings which we have seen since introduction of the cr16 clocksource. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/time.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 325f30d82b64..47ef8fdcd382 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -289,9 +289,26 @@ void __init time_init(void) cr16_hz = 100 * PAGE0->mem_10msec; /* Hz */ - /* register at clocksource framework */ - clocksource_register_hz(&clocksource_cr16, cr16_hz); - /* register as sched_clock source */ sched_clock_register(read_cr16_sched_clock, BITS_PER_LONG, cr16_hz); } + +static int __init init_cr16_clocksource(void) +{ + /* + * The cr16 interval timers are not syncronized across CPUs, so mark + * them unstable and lower rating on SMP systems. + */ + if (num_online_cpus() > 1) { + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; + } + + /* register at clocksource framework */ + clocksource_register_hz(&clocksource_cr16, + 100 * PAGE0->mem_10msec); + + return 0; +} + +device_initcall(init_cr16_clocksource); -- GitLab From 5f390df2baf73050b7af522ded2b2e070d21caa2 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 27 Dec 2016 08:57:59 -0800 Subject: [PATCH 0215/1442] HID: sensor-hub: Move the memset to sensor_hub_get_feature() commit 143fca77cce906d35f7a60ccef648e888df589f2 upstream. While applying patch d443a0aa3a29: "HID: hid-sensor-hub: clear memory to avoid random data", there was some issues in applying correct version of the patch. This resulted in the breakage of sensor functions as all request like power-up will be reset by the memset() in the function sensor_hub_set_feature(). The reset of caller buffer should be in the function sensor_hub_get_feature(), not in the sensor_hub_set_feature(). Fixes: d443a0aa3a29 ("HID: hid-sensor-hub: clear memory to avoid random data") Signed-off-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-sensor-hub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index 60875625cbdf..8f6c35370f66 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -212,7 +212,6 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, __s32 value; int ret = 0; - memset(buffer, 0, buffer_size); mutex_lock(&data->mutex); report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT); if (!report || (field_index >= report->maxfield)) { @@ -256,6 +255,8 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, int buffer_index = 0; int i; + memset(buffer, 0, buffer_size); + mutex_lock(&data->mutex); report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT); if (!report || (field_index >= report->maxfield) || -- GitLab From c7b1b7951d9d08a7b9ec19354fa8f33b5d1e9ddb Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 7 Dec 2016 16:22:16 +0100 Subject: [PATCH 0216/1442] ARM: davinci: da850: don't add emac clock to lookup table twice commit ef37427ac5677331145ab27a17e6f5f1b43f0c11 upstream. Similarly to the aemif clock - this screws up the linked list of clock children. Create a separate clock for mdio inheriting the rate from emac_clk. Signed-off-by: Bartosz Golaszewski [nsekhar@ti.com: add a comment over mdio_clk to explaing its existence + commit headline updates] Signed-off-by: Sekhar Nori Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-davinci/da850.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c index ed3d0e9f72ac..d7a43afdfac0 100644 --- a/arch/arm/mach-davinci/da850.c +++ b/arch/arm/mach-davinci/da850.c @@ -319,6 +319,16 @@ static struct clk emac_clk = { .gpsc = 1, }; +/* + * In order to avoid adding the emac_clk to the clock lookup table twice (and + * screwing up the linked list in the process) create a separate clock for + * mdio inheriting the rate from emac_clk. + */ +static struct clk mdio_clk = { + .name = "mdio", + .parent = &emac_clk, +}; + static struct clk mcasp_clk = { .name = "mcasp", .parent = &async3_clk, @@ -494,7 +504,7 @@ static struct clk_lookup da850_clks[] = { CLK(NULL, "arm", &arm_clk), CLK(NULL, "rmii", &rmii_clk), CLK("davinci_emac.1", NULL, &emac_clk), - CLK("davinci_mdio.0", "fck", &emac_clk), + CLK("davinci_mdio.0", "fck", &mdio_clk), CLK("davinci-mcasp.0", NULL, &mcasp_clk), CLK("davinci-mcbsp.0", NULL, &mcbsp0_clk), CLK("davinci-mcbsp.1", NULL, &mcbsp1_clk), -- GitLab From 3c4eef31670361212d4f762c9420fa9a4ff160c0 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Thu, 8 Dec 2016 17:31:14 +0530 Subject: [PATCH 0217/1442] pinctrl/amd: Set the level based on ACPI tables commit 2983f296f2327bc517e3b29344fce82271160197 upstream. In the function amd_gpio_irq_set_type, read the values from the ACPI table to set the level and drop the settings passed by the client. Reviewed-by: Pankaj Sen Reviewed-by: Nitesh Kumar Agrawal Reviewed-by: Shah, Nehal-bakulchandra Signed-off-by: Shyam-sundar S-k Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index aea310a91821..c9a146948192 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -382,26 +382,21 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type) { int ret = 0; u32 pin_reg; - unsigned long flags; - bool level_trig; - u32 active_level; + unsigned long flags, irq_flags; struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct amd_gpio *gpio_dev = gpiochip_get_data(gc); spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + (d->hwirq)*4); - /* - * When level_trig is set EDGE and active_level is set HIGH in BIOS - * default settings, ignore incoming settings from client and use - * BIOS settings to configure GPIO register. + /* Ignore the settings coming from the client and + * read the values from the ACPI tables + * while setting the trigger type */ - level_trig = !(pin_reg & (LEVEL_TRIGGER << LEVEL_TRIG_OFF)); - active_level = pin_reg & (ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF); - if(level_trig && - ((active_level >> ACTIVE_LEVEL_OFF) == ACTIVE_HIGH)) - type = IRQ_TYPE_EDGE_FALLING; + irq_flags = irq_get_trigger_type(d->irq); + if (irq_flags != IRQ_TYPE_NONE) + type = irq_flags; switch (type & IRQ_TYPE_SENSE_MASK) { case IRQ_TYPE_EDGE_RISING: -- GitLab From 5e7598a625eead50e6d783797de828fe22e86f25 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 2 Jan 2017 11:19:29 +0100 Subject: [PATCH 0218/1442] mac80211: initialize fast-xmit 'info' later commit 35f432a03e41d3bf08c51ede917f94e2288fbe8c upstream. In ieee80211_xmit_fast(), 'info' is initialized to point to the skb that's passed in, but that skb may later be replaced by a clone (if it was shared), leading to an invalid pointer. This can lead to use-after-free and also later crashes since the real SKB's info->hw_queue doesn't get initialized properly. Fix this by assigning info only later, when it's needed, after the skb replacement (may have) happened. Reported-by: Ben Greear Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index bd5f4be89435..dd190ff3daea 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3262,7 +3262,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2); int hw_headroom = sdata->local->hw.extra_tx_headroom; struct ethhdr eth; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info; struct ieee80211_hdr *hdr = (void *)fast_tx->hdr; struct ieee80211_tx_data tx; ieee80211_tx_result r; @@ -3326,6 +3326,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN); memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN); + info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); info->band = fast_tx->band; info->control.vif = &sdata->vif; -- GitLab From 8c775affbbd69486588ee3105e7fe357874d33fb Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Tue, 3 Jan 2017 13:49:42 +0100 Subject: [PATCH 0219/1442] asm-prototypes: Clear any CPP defines before declaring the functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c7858bf16c0b2cc62f475f31e6df28c3a68da1d6 upstream. The asm-prototypes.h file is used to provide dummy function declarations for genksyms, when processing asm files with EXPORT_SYMBOL. Make sure that any architecture defines get out of our way. x86 currently has an issue with memcpy on 64bit with CONFIG_KMEMCHECK=y and with memset/__memset on 32bit: $ cat init/test.c #include $ make -s init/test.o In file included from ./arch/x86/include/asm/string.h:4:0, from ./include/linux/string.h:18, from ./include/linux/bitmap.h:8, from ./include/linux/cpumask.h:11, from ./arch/x86/include/asm/cpumask.h:4, from ./arch/x86/include/asm/msr.h:10, from ./arch/x86/include/asm/processor.h:20, from ./arch/x86/include/asm/cpufeature.h:4, from ./arch/x86/include/asm/thread_info.h:52, from ./include/linux/thread_info.h:25, from ./arch/x86/include/asm/preempt.h:6, from ./include/linux/preempt.h:59, from ./include/linux/spinlock.h:50, from ./include/linux/seqlock.h:35, from ./include/linux/time.h:5, from ./include/uapi/linux/timex.h:56, from ./include/linux/timex.h:56, from ./include/linux/sched.h:19, from ./include/linux/uaccess.h:4, from ./arch/x86/include/asm/asm-prototypes.h:2, from init/test.c:1: ./arch/x86/include/asm/string_64.h:52:47: error: expected declaration specifiers or ‘...’ before ‘(’ token #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) ./include/asm-generic/asm-prototypes.h:6:14: note: in expansion of macro ‘memcpy’ extern void *memcpy(void *, const void *, __kernel_size_t); ^ ... During real build, this manifests itself by genksyms segfaulting. Fixes: 334bb7738764 ("x86/kbuild: enable modversions for symbols exported from asm") Reported-and-tested-by: Borislav Petkov Cc: Adam Borowski Signed-off-by: Michal Marek Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/asm-prototypes.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/asm-generic/asm-prototypes.h b/include/asm-generic/asm-prototypes.h index df13637e4017..939869c772b1 100644 --- a/include/asm-generic/asm-prototypes.h +++ b/include/asm-generic/asm-prototypes.h @@ -1,7 +1,13 @@ #include +#undef __memset extern void *__memset(void *, int, __kernel_size_t); +#undef __memcpy extern void *__memcpy(void *, const void *, __kernel_size_t); +#undef __memmove extern void *__memmove(void *, const void *, __kernel_size_t); +#undef memset extern void *memset(void *, int, __kernel_size_t); +#undef memcpy extern void *memcpy(void *, const void *, __kernel_size_t); +#undef memmove extern void *memmove(void *, const void *, __kernel_size_t); -- GitLab From 8f8157c2a72af9cbd0e868ef64b90565b818a445 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Dec 2016 11:36:06 -0800 Subject: [PATCH 0220/1442] gcc-plugins: update gcc-common.h for gcc-7 commit 81d873a87114b05dbb74d1fbf0c4322ba4bfdee4 upstream. This updates gcc-common.h from Emese Revfy for gcc 7. This fixes issues seen by Kugan and Arnd. Build tested with gcc 5.4 and 7 snapshot. Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- scripts/gcc-plugins/gcc-common.h | 85 ++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 950fd2e64bb7..12262c0cc691 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -39,6 +39,9 @@ #include "hash-map.h" #endif +#if BUILDING_GCC_VERSION >= 7000 +#include "memmodel.h" +#endif #include "emit-rtl.h" #include "debug.h" #include "target.h" @@ -91,6 +94,9 @@ #include "tree-ssa-alias.h" #include "tree-ssa.h" #include "stringpool.h" +#if BUILDING_GCC_VERSION >= 7000 +#include "tree-vrp.h" +#endif #include "tree-ssanames.h" #include "print-tree.h" #include "tree-eh.h" @@ -287,6 +293,22 @@ static inline struct cgraph_node *cgraph_next_function_with_gimple_body(struct c return NULL; } +static inline bool cgraph_for_node_and_aliases(cgraph_node_ptr node, bool (*callback)(cgraph_node_ptr, void *), void *data, bool include_overwritable) +{ + cgraph_node_ptr alias; + + if (callback(node, data)) + return true; + + for (alias = node->same_body; alias; alias = alias->next) { + if (include_overwritable || cgraph_function_body_availability(alias) > AVAIL_OVERWRITABLE) + if (cgraph_for_node_and_aliases(alias, callback, data, include_overwritable)) + return true; + } + + return false; +} + #define FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node) \ for ((node) = cgraph_first_function_with_gimple_body(); (node); \ (node) = cgraph_next_function_with_gimple_body(node)) @@ -399,6 +421,7 @@ typedef union gimple_statement_d gassign; typedef union gimple_statement_d gcall; typedef union gimple_statement_d gcond; typedef union gimple_statement_d gdebug; +typedef union gimple_statement_d ggoto; typedef union gimple_statement_d gphi; typedef union gimple_statement_d greturn; @@ -452,6 +475,16 @@ static inline const gdebug *as_a_const_gdebug(const_gimple stmt) return stmt; } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return stmt; +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return stmt; +} + static inline gphi *as_a_gphi(gimple stmt) { return stmt; @@ -496,6 +529,14 @@ static inline const greturn *as_a_const_greturn(const_gimple stmt) typedef struct rtx_def rtx_insn; +static inline const char *get_decl_section_name(const_tree decl) +{ + if (DECL_SECTION_NAME(decl) == NULL_TREE) + return NULL; + + return TREE_STRING_POINTER(DECL_SECTION_NAME(decl)); +} + static inline void set_decl_section_name(tree node, const char *value) { if (value) @@ -511,6 +552,7 @@ typedef struct gimple_statement_base gassign; typedef struct gimple_statement_call gcall; typedef struct gimple_statement_base gcond; typedef struct gimple_statement_base gdebug; +typedef struct gimple_statement_base ggoto; typedef struct gimple_statement_phi gphi; typedef struct gimple_statement_base greturn; @@ -564,6 +606,16 @@ static inline const gdebug *as_a_const_gdebug(const_gimple stmt) return stmt; } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return stmt; +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return stmt; +} + static inline gphi *as_a_gphi(gimple stmt) { return as_a(stmt); @@ -611,6 +663,11 @@ inline bool is_a_helper::test(const_gimple gs) #define INSN_DELETED_P(insn) (insn)->deleted() +static inline const char *get_decl_section_name(const_tree decl) +{ + return DECL_SECTION_NAME(decl); +} + /* symtab/cgraph related */ #define debug_cgraph_node(node) (node)->debug() #define cgraph_get_node(decl) cgraph_node::get(decl) @@ -619,6 +676,7 @@ inline bool is_a_helper::test(const_gimple gs) #define cgraph_n_nodes symtab->cgraph_count #define cgraph_max_uid symtab->cgraph_max_uid #define varpool_get_node(decl) varpool_node::get(decl) +#define dump_varpool_node(file, node) (node)->dump(file) #define cgraph_create_edge(caller, callee, call_stmt, count, freq, nest) \ (caller)->create_edge((callee), (call_stmt), (count), (freq)) @@ -674,6 +732,11 @@ static inline cgraph_node_ptr cgraph_alias_target(cgraph_node_ptr node) return node->get_alias_target(); } +static inline bool cgraph_for_node_and_aliases(cgraph_node_ptr node, bool (*callback)(cgraph_node_ptr, void *), void *data, bool include_overwritable) +{ + return node->call_for_symbol_thunks_and_aliases(callback, data, include_overwritable); +} + static inline struct cgraph_node_hook_list *cgraph_add_function_insertion_hook(cgraph_node_hook hook, void *data) { return symtab->add_cgraph_insertion_hook(hook, data); @@ -729,6 +792,13 @@ static inline gimple gimple_build_assign_with_ops(enum tree_code subcode, tree l return gimple_build_assign(lhs, subcode, op1, op2 PASS_MEM_STAT); } +template <> +template <> +inline bool is_a_helper::test(const_gimple gs) +{ + return gs->code == GIMPLE_GOTO; +} + template <> template <> inline bool is_a_helper::test(const_gimple gs) @@ -766,6 +836,16 @@ static inline const gcall *as_a_const_gcall(const_gimple stmt) return as_a(stmt); } +static inline ggoto *as_a_ggoto(gimple stmt) +{ + return as_a(stmt); +} + +static inline const ggoto *as_a_const_ggoto(const_gimple stmt) +{ + return as_a(stmt); +} + static inline gphi *as_a_gphi(gimple stmt) { return as_a(stmt); @@ -828,4 +908,9 @@ static inline void debug_gimple_stmt(const_gimple s) #define debug_gimple_stmt(s) debug_gimple_stmt(CONST_CAST_GIMPLE(s)) #endif +#if BUILDING_GCC_VERSION >= 7000 +#define get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep, keep_aligning) \ + get_inner_reference(exp, pbitsize, pbitpos, poffset, pmode, punsignedp, preversep, pvolatilep) +#endif + #endif -- GitLab From 5652dd3f005d7947c588ced586e09c48d68be7ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 7 Dec 2016 17:56:47 +0000 Subject: [PATCH 0221/1442] drm/i915: Fix oopses in the overlay code due to i915_gem_active stuff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b72eb5ffa6d8601d9ba72619d75fb5b27723743a upstream. The i915_gem_active stuff doesn't like a NULL ->retire hook, but the overlay code can set it to NULL. That obviously ends up oopsing. Fix it by introducing a new helper to assign the retirement callback that will switch out the NULL function pointer with i915_gem_retire_noop. Cc: Chris Wilson Cc: Joonas Lahtinen Fixes: 0d9bdd886f29 ("drm/i915: Convert intel_overlay to request tracking") Signed-off-by: Ville Syrjälä Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20161207175647.10018-1-chris@chris-wilson.co.uk (cherry picked from commit ecd9caa0522db5a6b03ac8858c42067ef9d8323b) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_request.h | 19 +++++++++++++++++++ drivers/gpu/drm/i915/intel_overlay.c | 3 ++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 974bd7bcc801..59ac90025552 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -344,6 +344,25 @@ i915_gem_active_set(struct i915_gem_active *active, rcu_assign_pointer(active->request, request); } +/** + * i915_gem_active_set_retire_fn - updates the retirement callback + * @active - the active tracker + * @fn - the routine called when the request is retired + * @mutex - struct_mutex used to guard retirements + * + * i915_gem_active_set_retire_fn() updates the function pointer that + * is called when the final request associated with the @active tracker + * is retired. + */ +static inline void +i915_gem_active_set_retire_fn(struct i915_gem_active *active, + i915_gem_retire_fn fn, + struct mutex *mutex) +{ + lockdep_assert_held(mutex); + active->retire = fn ?: i915_gem_retire_noop; +} + static inline struct drm_i915_gem_request * __i915_gem_active_peek(const struct i915_gem_active *active) { diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index a24bc8c7889f..bcde9f6eecf1 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -216,7 +216,8 @@ static void intel_overlay_submit_request(struct intel_overlay *overlay, { GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, &overlay->i915->drm.struct_mutex)); - overlay->last_flip.retire = retire; + i915_gem_active_set_retire_fn(&overlay->last_flip, retire, + &overlay->i915->drm.struct_mutex); i915_gem_active_set(&overlay->last_flip, req); i915_add_request(req); } -- GitLab From 73d4256359899b239a8bb58546369afebeb4fc40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 7 Dec 2016 19:28:03 +0200 Subject: [PATCH 0222/1442] drm/i915: Fix oops in overlay due to frontbuffer tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9169757ae67bc927750ae907624e65cc15b4fe5a upstream. The vma will be NULL if the overlay was previously off, so dereferencing it will oops. Check for NULL before doing that. Cc: Chris Wilson Cc: Joonas Lahtinen Fixes: 9b3b7841b86d ("drm/i915/overlay: Use VMA as the primary tracker for images") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1481131693-27993-2-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit 4a15cdbbc55463e55a7cdcf33f84ccc742ca9c29) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_overlay.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index bcde9f6eecf1..42ebd4d21e47 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -840,8 +840,8 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret) goto out_unpin; - i915_gem_track_fb(overlay->vma->obj, new_bo, - INTEL_FRONTBUFFER_OVERLAY(pipe)); + i915_gem_track_fb(overlay->vma ? overlay->vma->obj : NULL, + vma->obj, INTEL_FRONTBUFFER_OVERLAY(pipe)); overlay->old_vma = overlay->vma; overlay->vma = vma; -- GitLab From f0f7f38e80688776a21e6a91025a4851c06c6ee3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 20 Dec 2016 18:51:17 +0200 Subject: [PATCH 0223/1442] drm/i915: Force VDD off on the new power seqeuencer before starting to use it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8581f1b5ee0837e55197f036406bc99746ac94b2 upstream. Apparently some VLV BIOSen like to leave the VDD force bit enabled even for power seqeuncers that aren't properly hooked up to any port. That will result in a imbalance in the AUX power domain refcount when we stat to use said power sequencer as edp_panel_vdd_on() will not grab the power domain reference if it sees that the VDD is already on. To fix this let's make sure we turn off the VDD force bit when we initialize the power sequencer registers. That is, unless it's being done from the init path since there we are actually initializing the registers for the current power sequencer and we don't want to turn VDD off needlessly as that would require waiting for the power cycle delay before we turn it back on. This fixes the following kind of warnings: WARNING: CPU: 0 PID: 123 at ../drivers/gpu/drm/i915/intel_runtime_pm.c:1455 intel_display_power_put+0x13a/0x170 [i915]() WARN_ON(!power_domains->domain_use_count[domain]) ... v2: Fix typos in comment (David) Cc: Matwey V. Kornilov Tested-by: Matwey V. Kornilov Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98695 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161220165117.24801-1-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall (cherry picked from commit 5d5ab2d26f32bdaa5872b938658e0bf8d341bc4c) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp.c | 41 +++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index bf344d08356a..13ae67cc6b8a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -280,7 +280,8 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, struct intel_dp *intel_dp); static void intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, - struct intel_dp *intel_dp); + struct intel_dp *intel_dp, + bool force_disable_vdd); static void intel_dp_pps_init(struct drm_device *dev, struct intel_dp *intel_dp); @@ -442,7 +443,7 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, true); /* * Even vdd force doesn't work until we've made @@ -479,7 +480,7 @@ bxt_power_sequencer_idx(struct intel_dp *intel_dp) * Only the HW needs to be reprogrammed, the SW state is fixed and * has been setup during connector init. */ - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); return 0; } @@ -562,7 +563,7 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) port_name(port), pipe_name(intel_dp->pps_pipe)); intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); } void intel_power_sequencer_reset(struct drm_i915_private *dev_priv) @@ -2924,7 +2925,7 @@ static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp) /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, true); } static void vlv_pre_enable_dp(struct intel_encoder *encoder, @@ -5054,7 +5055,8 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, static void intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, - struct intel_dp *intel_dp) + struct intel_dp *intel_dp, + bool force_disable_vdd) { struct drm_i915_private *dev_priv = to_i915(dev); u32 pp_on, pp_off, pp_div, port_sel = 0; @@ -5067,6 +5069,31 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, intel_pps_get_registers(dev_priv, intel_dp, ®s); + /* + * On some VLV machines the BIOS can leave the VDD + * enabled even on power seqeuencers which aren't + * hooked up to any port. This would mess up the + * power domain tracking the first time we pick + * one of these power sequencers for use since + * edp_panel_vdd_on() would notice that the VDD was + * already on and therefore wouldn't grab the power + * domain reference. Disable VDD first to avoid this. + * This also avoids spuriously turning the VDD on as + * soon as the new power seqeuencer gets initialized. + */ + if (force_disable_vdd) { + u32 pp = ironlake_get_pp_control(intel_dp); + + WARN(pp & PANEL_POWER_ON, "Panel power already on\n"); + + if (pp & EDP_FORCE_VDD) + DRM_DEBUG_KMS("VDD already on, disabling first\n"); + + pp &= ~EDP_FORCE_VDD; + + I915_WRITE(regs.pp_ctrl, pp); + } + pp_on = (seq->t1_t3 << PANEL_POWER_UP_DELAY_SHIFT) | (seq->t8 << PANEL_LIGHT_ON_DELAY_SHIFT); pp_off = (seq->t9 << PANEL_LIGHT_OFF_DELAY_SHIFT) | @@ -5119,7 +5146,7 @@ static void intel_dp_pps_init(struct drm_device *dev, vlv_initial_power_sequencer_setup(intel_dp); } else { intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); } } -- GitLab From fe4fc2d67d0bfa0a1b5178e4f3c0f3ae7b29f737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 21 Dec 2016 16:45:47 +0200 Subject: [PATCH 0224/1442] drm/i915: Initialize overlay->last_flip properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a6d3e7d35d088b2aabad1688b740e17bfdf566c5 upstream. Initialize overlay->last_flip properly instead of leaving it zeroed. Cc: Chris Wilson Fixes: 0d9bdd886f29 ("drm/i915: Convert intel_overlay to request tracking") Reviewed-by: Chris Wilson Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161221144547.27319-1-ville.syrjala@linux.intel.com (cherry picked from commit 330afdb1df0f3fb48583105493a8f4f8d9e3af36) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_overlay.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 42ebd4d21e47..a2655cd5a84e 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1431,6 +1431,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) overlay->contrast = 75; overlay->saturation = 146; + init_request_active(&overlay->last_flip, NULL); + regs = intel_overlay_map_regs(overlay); if (!regs) goto out_unpin_bo; -- GitLab From f39969ab0418a9ef7da3f6e571359449b3497d83 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sat, 24 Dec 2016 10:00:42 +0100 Subject: [PATCH 0225/1442] KVM: x86: reset MMU on KVM_SET_VCPU_EVENTS commit 6ef4e07ecd2db21025c446327ecf34414366498b upstream. Otherwise, mismatch between the smm bit in hflags and the MMU role can cause a NULL pointer dereference. Signed-off-by: Xiao Guangrong Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 04c5d96b1d67..f3648c978d2f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3036,6 +3036,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, memset(&events->reserved, 0, sizeof(events->reserved)); } +static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags); + static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { @@ -3072,10 +3074,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { + u32 hflags = vcpu->arch.hflags; if (events->smi.smm) - vcpu->arch.hflags |= HF_SMM_MASK; + hflags |= HF_SMM_MASK; else - vcpu->arch.hflags &= ~HF_SMM_MASK; + hflags &= ~HF_SMM_MASK; + kvm_set_hflags(vcpu, hflags); + vcpu->arch.smi_pending = events->smi.pending; if (events->smi.smm_inside_nmi) vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; -- GitLab From 26a401a6a52f7b838ce65b3b48e314c0a4aaa348 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 3 Jan 2017 17:43:00 +0000 Subject: [PATCH 0226/1442] KVM: MIPS: Don't clobber CP0_Status.UX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4c881451d3017033597ea186cf79ae41a73e1ef8 upstream. On 64-bit kernels, MIPS KVM will clear CP0_Status.UX to prevent the guest (running in user mode) from accessing the 64-bit memory segments. However the previous value of CP0_Status.UX is never restored when exiting from the guest. If the user process uses 64-bit addressing (the n64 ABI) this can result in address error exceptions from the kernel if it needs to deliver a signal before returning to user mode, as the kernel will need to write a sigframe to high user addresses on the user stack which are disallowed by CP0_Status.UX=0. This is fixed by explicitly setting SX and UX again when exiting from the guest, and explicitly clearing those bits when returning to the guest. Having the SX and UX bits set when handling guest exits (rather than only when exiting to userland) will be helpful when we support VZ, since we shouldn't need to directly read or write guest memory, so it will be valid for cache management IPIs to access host user addresses. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/entry.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c index 6a02b3a3fa65..e92fb190e2d6 100644 --- a/arch/mips/kvm/entry.c +++ b/arch/mips/kvm/entry.c @@ -521,6 +521,9 @@ void *kvm_mips_build_exit(void *addr) uasm_i_and(&p, V0, V0, AT); uasm_i_lui(&p, AT, ST0_CU0 >> 16); uasm_i_or(&p, V0, V0, AT); +#ifdef CONFIG_64BIT + uasm_i_ori(&p, V0, V0, ST0_SX | ST0_UX); +#endif uasm_i_mtc0(&p, V0, C0_STATUS); uasm_i_ehb(&p); @@ -643,7 +646,7 @@ static void *kvm_mips_build_ret_to_guest(void *addr) /* Setup status register for running guest in UM */ uasm_i_ori(&p, V1, V1, ST0_EXL | KSU_USER | ST0_IE); - UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX)); + UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX | ST0_SX | ST0_UX)); uasm_i_and(&p, V1, V1, AT); uasm_i_mtc0(&p, V1, C0_STATUS); uasm_i_ehb(&p); -- GitLab From 84fd8feb5bb50508ba336c7eaf2f377e0b241521 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 3 Jan 2017 17:43:01 +0000 Subject: [PATCH 0227/1442] KVM: MIPS: Flush KVM entry code from icache globally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 32eb12a6c11034867401d56b012e3c15d5f8141e upstream. Flush the KVM entry code from the icache on all CPUs, not just the one that built the entry code. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/mips.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 06a60b19acfb..29ec9ab3fd55 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -360,8 +360,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run); /* Invalidate the icache for these ranges */ - local_flush_icache_range((unsigned long)gebase, - (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); + flush_icache_range((unsigned long)gebase, + (unsigned long)gebase + ALIGN(size, PAGE_SIZE)); /* * Allocate comm page for guest kernel, a TLB will be reserved for -- GitLab From 5de2dd7f1be18c1ee72c4aa4aeccb191982c1e7c Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 3 Jan 2017 18:13:46 -0600 Subject: [PATCH 0228/1442] usb: musb: core: add clear_ep_rxintr() to musb_platform_ops commit 6def85a396ce7796bd9f4561c6ae8138833f7a52 upstream. During dma teardown for dequque urb, if musb load is high, musb might generate bogus rx ep interrupt even when the rx fifo is flushed. In such case any of the follow log messages could happen. musb_host_rx 1853: BOGUS RX2 ready, csr 0000, count 0 musb_host_rx 1936: RX3 dma busy, csr 2020 As mentioned in the current inline comment, clearing ep interrupt in the teardown path avoids the bogus interrupt. Clearing ep interrupt is platform dependent, so this patch adds a platform callback to allow glue driver to clear the ep interrupt. This bug seems to be existing since the initial driver for musb support, but I only validated the fix back to v4.1, so only cc stable for v4.1+. Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.h | 7 +++++++ drivers/usb/musb/musb_host.c | 10 ++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 91817d77d59c..47331dbdde29 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -216,6 +216,7 @@ struct musb_platform_ops { void (*pre_root_reset_end)(struct musb *musb); void (*post_root_reset_end)(struct musb *musb); int (*phy_callback)(enum musb_vbus_id_status status); + void (*clear_ep_rxintr)(struct musb *musb, int epnum); }; /* @@ -626,4 +627,10 @@ static inline void musb_platform_post_root_reset_end(struct musb *musb) musb->ops->post_root_reset_end(musb); } +static inline void musb_platform_clear_ep_rxintr(struct musb *musb, int epnum) +{ + if (musb->ops->clear_ep_rxintr) + musb->ops->clear_ep_rxintr(musb, epnum); +} + #endif /* __MUSB_CORE_H__ */ diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 53bc4ceefe89..806451418cfe 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2374,12 +2374,11 @@ static int musb_cleanup_urb(struct urb *urb, struct musb_qh *qh) int is_in = usb_pipein(urb->pipe); int status = 0; u16 csr; + struct dma_channel *dma = NULL; musb_ep_select(regs, hw_end); if (is_dma_capable()) { - struct dma_channel *dma; - dma = is_in ? ep->rx_channel : ep->tx_channel; if (dma) { status = ep->musb->dma_controller->channel_abort(dma); @@ -2395,10 +2394,9 @@ static int musb_cleanup_urb(struct urb *urb, struct musb_qh *qh) /* giveback saves bulk toggle */ csr = musb_h_flush_rxfifo(ep, 0); - /* REVISIT we still get an irq; should likely clear the - * endpoint's irq status here to avoid bogus irqs. - * clearing that status is platform-specific... - */ + /* clear the endpoint's irq status here to avoid bogus irqs */ + if (is_dma_capable() && dma) + musb_platform_clear_ep_rxintr(musb, ep->epnum); } else if (ep->epnum) { musb_h_tx_flush_fifo(ep); csr = musb_readw(epio, MUSB_TXCSR); -- GitLab From 4a37dbe68289b5bddb43656385a230d13d56f6dc Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 3 Jan 2017 18:13:47 -0600 Subject: [PATCH 0229/1442] usb: musb: dsps: implement clear_ep_rxintr() callback commit c48400baa02155a5ddad63e8554602e48782278c upstream. During dma teardown for dequque urb, if musb load is high, musb might generate bogus rx ep interrupt even when the rx fifo is flushed. In such case any of the follow log messages could happen. musb_host_rx 1853: BOGUS RX2 ready, csr 0000, count 0 musb_host_rx 1936: RX3 dma busy, csr 2020 As mentioned in the current inline comment, clearing ep interrupt in the teardown path avoids the bogus interrupt, so implement clear_ep_rxintr() callback. This bug seems to be existing since the initial driver for musb support, but I only validated the fix back to v4.1, so only cc stable for v4.1+. Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_dsps.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index feae1561b9ab..9f125e179acd 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -267,6 +267,17 @@ static void otg_timer(unsigned long _musb) pm_runtime_put_autosuspend(dev); } +void dsps_musb_clear_ep_rxintr(struct musb *musb, int epnum) +{ + u32 epintr; + struct dsps_glue *glue = dev_get_drvdata(musb->controller->parent); + const struct dsps_musb_wrapper *wrp = glue->wrp; + + /* musb->lock might already been held */ + epintr = (1 << epnum) << wrp->rxep_shift; + musb_writel(musb->ctrl_base, wrp->epintr_status, epintr); +} + static irqreturn_t dsps_interrupt(int irq, void *hci) { struct musb *musb = hci; @@ -622,6 +633,7 @@ static struct musb_platform_ops dsps_ops = { .set_mode = dsps_musb_set_mode, .recover = dsps_musb_recover, + .clear_ep_rxintr = dsps_musb_clear_ep_rxintr, }; static u64 musb_dmamask = DMA_BIT_MASK(32); -- GitLab From 72271ae49d6bf4011f5861fcae9e29dfbf87fb65 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 2 Jan 2017 15:26:17 +0100 Subject: [PATCH 0230/1442] usb: storage: unusual_uas: Add JMicron JMS56x to unusual device commit 674aea07e38200ea6f31ff6d5f200f0cf6cdb325 upstream. This device gives the following error on detection. xhci_hcd 0000:00:11.0: ERROR Transfer event for disabled endpoint or incorrect stream ring The same error is not seen when it is added to unusual_device list with US_FL_NO_REPORT_OPCODES passed. Signed-off-by: George Cherian Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index af3c7eecff91..16cc18369111 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2109,6 +2109,13 @@ UNUSUAL_DEV( 0x152d, 0x2566, 0x0114, 0x0114, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA ), +/* Reported-by George Cherian */ +UNUSUAL_DEV(0x152d, 0x9561, 0x0000, 0x9999, + "JMicron", + "JMS56x", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_REPORT_OPCODES), + /* * Entrega Technologies U1-SC25 (later Xircom PortGear PGSCSI) * and Mac USB Dock USB-SCSI */ -- GitLab From 404954e5b8a675ea5b4ef4dc4c5a74b1ff3ad410 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 6 Dec 2016 08:36:29 +0100 Subject: [PATCH 0231/1442] usb: gadgetfs: restrict upper bound on device configuration size commit 0994b0a257557e18ee8f0b7c5f0f73fe2b54eec1 upstream. Andrey Konovalov reported that we were not properly checking the upper limit before of a device configuration size before calling memdup_user(), which could cause some problems. So set the upper limit to PAGE_SIZE * 4, which should be good enough for all devices. Reported-by: Andrey Konovalov Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index bd82dd12deff..298886b40361 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1762,7 +1762,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) } spin_unlock_irq(&dev->lock); - if (len < (USB_DT_CONFIG_SIZE + USB_DT_DEVICE_SIZE + 4)) + if ((len < (USB_DT_CONFIG_SIZE + USB_DT_DEVICE_SIZE + 4)) || + (len > PAGE_SIZE * 4)) return -EINVAL; /* we might need to change message format someday */ -- GitLab From b946777664dba9e3d5712073c173544c12762351 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 9 Dec 2016 15:17:46 -0500 Subject: [PATCH 0232/1442] USB: gadgetfs: fix unbounded memory allocation bug commit faab50984fe6636e616c7cc3d30308ba391d36fd upstream. Andrey Konovalov reports that fuzz testing with syzkaller causes a KASAN warning in gadgetfs: BUG: KASAN: slab-out-of-bounds in dev_config+0x86f/0x1190 at addr ffff88003c47e160 Write of size 65537 by task syz-executor0/6356 CPU: 3 PID: 6356 Comm: syz-executor0 Not tainted 4.9.0-rc7+ #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88003c107ad8 ffffffff81f96aba ffffffff3dc11ef0 1ffff10007820eee ffffed0007820ee6 ffff88003dc11f00 0000000041b58ab3 ffffffff8598b4c8 ffffffff81f96828 ffffffff813fb4a0 ffff88003b6eadc0 ffff88003c107738 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x292/0x398 lib/dump_stack.c:51 [] kasan_object_err+0x1c/0x70 mm/kasan/report.c:159 [< inline >] print_address_description mm/kasan/report.c:197 [] kasan_report_error+0x1f0/0x4e0 mm/kasan/report.c:286 [] kasan_report+0x35/0x40 mm/kasan/report.c:306 [< inline >] check_memory_region_inline mm/kasan/kasan.c:308 [] check_memory_region+0x139/0x190 mm/kasan/kasan.c:315 [] kasan_check_write+0x14/0x20 mm/kasan/kasan.c:326 [< inline >] copy_from_user arch/x86/include/asm/uaccess.h:689 [< inline >] ep0_write drivers/usb/gadget/legacy/inode.c:1135 [] dev_config+0x86f/0x1190 drivers/usb/gadget/legacy/inode.c:1759 [] __vfs_write+0x5d5/0x760 fs/read_write.c:510 [] vfs_write+0x170/0x4e0 fs/read_write.c:560 [< inline >] SYSC_write fs/read_write.c:607 [] SyS_write+0xfb/0x230 fs/read_write.c:599 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 Indeed, there is a comment saying that the value of len is restricted to a 16-bit integer, but the code doesn't actually do this. This patch fixes the warning. It replaces the comment with a computation that forces the amount of data copied from the user in ep0_write() to be no larger than the wLength size for the control transfer, which is a 16-bit quantity. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 298886b40361..e597534d23ac 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1126,7 +1126,7 @@ ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) /* data and/or status stage for control request */ } else if (dev->state == STATE_DEV_SETUP) { - /* IN DATA+STATUS caller makes len <= wLength */ + len = min_t(size_t, len, dev->setup_wLength); if (dev->setup_in) { retval = setup_req (dev->gadget->ep0, dev->req, len); if (retval == 0) { -- GitLab From 46427c247b6204f95d7406df9f43843306b0b7ab Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 9 Dec 2016 15:18:43 -0500 Subject: [PATCH 0233/1442] USB: gadgetfs: fix use-after-free bug commit add333a81a16abbd4f106266a2553677a165725f upstream. Andrey Konovalov reports that fuzz testing with syzkaller causes a KASAN use-after-free bug report in gadgetfs: BUG: KASAN: use-after-free in gadgetfs_setup+0x208a/0x20e0 at addr ffff88003dfe5bf2 Read of size 2 by task syz-executor0/22994 CPU: 3 PID: 22994 Comm: syz-executor0 Not tainted 4.9.0-rc7+ #16 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88006df06a18 ffffffff81f96aba ffffffffe0528500 1ffff1000dbe0cd6 ffffed000dbe0cce ffff88006df068f0 0000000041b58ab3 ffffffff8598b4c8 ffffffff81f96828 1ffff1000dbe0ccd ffff88006df06708 ffff88006df06748 Call Trace: [ 201.343209] [< inline >] __dump_stack lib/dump_stack.c:15 [ 201.343209] [] dump_stack+0x292/0x398 lib/dump_stack.c:51 [] kasan_object_err+0x1c/0x70 mm/kasan/report.c:159 [< inline >] print_address_description mm/kasan/report.c:197 [] kasan_report_error+0x1f0/0x4e0 mm/kasan/report.c:286 [< inline >] kasan_report mm/kasan/report.c:306 [] __asan_report_load_n_noabort+0x3a/0x40 mm/kasan/report.c:337 [< inline >] config_buf drivers/usb/gadget/legacy/inode.c:1298 [] gadgetfs_setup+0x208a/0x20e0 drivers/usb/gadget/legacy/inode.c:1368 [] dummy_timer+0x11f0/0x36d0 drivers/usb/gadget/udc/dummy_hcd.c:1858 [] call_timer_fn+0x241/0x800 kernel/time/timer.c:1308 [< inline >] expire_timers kernel/time/timer.c:1348 [] __run_timers+0xa06/0xec0 kernel/time/timer.c:1641 [] run_timer_softirq+0x21/0x80 kernel/time/timer.c:1654 [] __do_softirq+0x2fb/0xb63 kernel/softirq.c:284 The cause of the bug is subtle. The dev_config() routine gets called twice by the fuzzer. The first time, the user data contains both a full-speed configuration descriptor and a high-speed config descriptor, causing dev->hs_config to be set. But it also contains an invalid device descriptor, so the buffer containing the descriptors is deallocated and dev_config() returns an error. The second time dev_config() is called, the user data contains only a full-speed config descriptor. But dev->hs_config still has the stale pointer remaining from the first call, causing the routine to think that there is a valid high-speed config. Later on, when the driver dereferences the stale pointer to copy that descriptor, we get a use-after-free access. The fix is simple: Clear dev->hs_config if the passed-in data does not contain a high-speed config descriptor. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index e597534d23ac..49c4de8b3165 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1800,6 +1800,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) goto fail; kbuf += total; length -= total; + } else { + dev->hs_config = NULL; } /* could support multiple configs, using another encoding! */ -- GitLab From da4543b3bce001ccc8a283ed64933dea13fb7b77 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 9 Dec 2016 15:24:24 -0500 Subject: [PATCH 0234/1442] USB: gadgetfs: fix checks of wTotalLength in config descriptors commit 1c069b057dcf64fada952eaa868d35f02bb0cfc2 upstream. Andrey Konovalov's fuzz testing of gadgetfs showed that we should improve the driver's checks for valid configuration descriptors passed in by the user. In particular, the driver needs to verify that the wTotalLength value in the descriptor is not too short (smaller than USB_DT_CONFIG_SIZE). And the check for whether wTotalLength is too large has to be changed, because the driver assumes there is always enough room remaining in the buffer to hold a device descriptor (at least USB_DT_DEVICE_SIZE bytes). This patch adds the additional check and fixes the existing check. It may do a little more than strictly necessary, but one extra check won't hurt. Signed-off-by: Alan Stern CC: Andrey Konovalov Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/inode.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 49c4de8b3165..1468d8f085a3 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1734,10 +1734,12 @@ static struct usb_gadget_driver gadgetfs_driver = { * such as configuration notifications. */ -static int is_valid_config (struct usb_config_descriptor *config) +static int is_valid_config(struct usb_config_descriptor *config, + unsigned int total) { return config->bDescriptorType == USB_DT_CONFIG && config->bLength == USB_DT_CONFIG_SIZE + && total >= USB_DT_CONFIG_SIZE && config->bConfigurationValue != 0 && (config->bmAttributes & USB_CONFIG_ATT_ONE) != 0 && (config->bmAttributes & USB_CONFIG_ATT_WAKEUP) == 0; @@ -1787,7 +1789,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) /* full or low speed config */ dev->config = (void *) kbuf; total = le16_to_cpu(dev->config->wTotalLength); - if (!is_valid_config (dev->config) || total >= length) + if (!is_valid_config(dev->config, total) || + total > length - USB_DT_DEVICE_SIZE) goto fail; kbuf += total; length -= total; @@ -1796,7 +1799,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) if (kbuf [1] == USB_DT_CONFIG) { dev->hs_config = (void *) kbuf; total = le16_to_cpu(dev->hs_config->wTotalLength); - if (!is_valid_config (dev->hs_config) || total >= length) + if (!is_valid_config(dev->hs_config, total) || + total > length - USB_DT_DEVICE_SIZE) goto fail; kbuf += total; length -= total; -- GitLab From 05b0f2fc3c2f9efda47439557e0d51faca7e43ed Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 19 Dec 2016 12:03:41 -0500 Subject: [PATCH 0235/1442] USB: fix problems with duplicate endpoint addresses commit 0a8fd1346254974c3a852338508e4a4cddbb35f1 upstream. When checking a new device's descriptors, the USB core does not check for duplicate endpoint addresses. This can cause a problem when the sysfs files for those endpoints are created; trying to create multiple files with the same name will provoke a WARNING: WARNING: CPU: 2 PID: 865 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x8a/0xa0 sysfs: cannot create duplicate filename '/devices/platform/dummy_hcd.0/usb2/2-1/2-1:64.0/ep_05' Kernel panic - not syncing: panic_on_warn set ... CPU: 2 PID: 865 Comm: kworker/2:1 Not tainted 4.9.0-rc7+ #34 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event ffff88006bee64c8 ffffffff81f96b8a ffffffff00000001 1ffff1000d7dcc2c ffffed000d7dcc24 0000000000000001 0000000041b58ab3 ffffffff8598b510 ffffffff81f968f8 ffffffff850fee20 ffffffff85cff020 dffffc0000000000 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x292/0x398 lib/dump_stack.c:51 [] panic+0x1cb/0x3a9 kernel/panic.c:179 [] __warn+0x1c4/0x1e0 kernel/panic.c:542 [] warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:565 [] sysfs_warn_dup+0x8a/0xa0 fs/sysfs/dir.c:30 [] sysfs_create_dir_ns+0x178/0x1d0 fs/sysfs/dir.c:59 [< inline >] create_dir lib/kobject.c:71 [] kobject_add_internal+0x227/0xa60 lib/kobject.c:229 [< inline >] kobject_add_varg lib/kobject.c:366 [] kobject_add+0x139/0x220 lib/kobject.c:411 [] device_add+0x353/0x1660 drivers/base/core.c:1088 [] device_register+0x1d/0x20 drivers/base/core.c:1206 [] usb_create_ep_devs+0x163/0x260 drivers/usb/core/endpoint.c:195 [] create_intf_ep_devs+0x13b/0x200 drivers/usb/core/message.c:1030 [] usb_set_configuration+0x1083/0x18d0 drivers/usb/core/message.c:1937 [] generic_probe+0x6e/0xe0 drivers/usb/core/generic.c:172 [] usb_probe_device+0xaa/0xe0 drivers/usb/core/driver.c:263 This patch prevents the problem by checking for duplicate endpoint addresses during enumeration and skipping any duplicates. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index a2d90aca779f..1f7036c8f57b 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -234,6 +234,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, if (ifp->desc.bNumEndpoints >= num_ep) goto skip_to_next_endpoint_or_interface_descriptor; + /* Check for duplicate endpoint addresses */ + for (i = 0; i < ifp->desc.bNumEndpoints; ++i) { + if (ifp->endpoint[i].desc.bEndpointAddress == + d->bEndpointAddress) { + dev_warn(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", + cfgno, inum, asnum, d->bEndpointAddress); + goto skip_to_next_endpoint_or_interface_descriptor; + } + } + endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; ++ifp->desc.bNumEndpoints; -- GitLab From 735daeec9e6062cad8aab412d0c04d26ae8e13e7 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 14 Dec 2016 14:55:56 -0500 Subject: [PATCH 0236/1442] USB: dummy-hcd: fix bug in stop_activity (handle ep0) commit bcdbeb844773333d2d1c08004f3b3e25921040e5 upstream. The stop_activity() routine in dummy-hcd is supposed to unlink all active requests for every endpoint, among other things. But it doesn't handle ep0. As a result, fuzz testing can generate a WARNING like the following: WARNING: CPU: 0 PID: 4410 at drivers/usb/gadget/udc/dummy_hcd.c:672 dummy_free_request+0x153/0x170 Modules linked in: CPU: 0 PID: 4410 Comm: syz-executor Not tainted 4.9.0-rc7+ #32 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88006a64ed10 ffffffff81f96b8a ffffffff41b58ab3 1ffff1000d4c9d35 ffffed000d4c9d2d ffff880065f8ac00 0000000041b58ab3 ffffffff8598b510 ffffffff81f968f8 0000000041b58ab3 ffffffff859410e0 ffffffff813f0590 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x292/0x398 lib/dump_stack.c:51 [] __warn+0x19f/0x1e0 kernel/panic.c:550 [] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585 [] dummy_free_request+0x153/0x170 drivers/usb/gadget/udc/dummy_hcd.c:672 [] usb_ep_free_request+0xc0/0x420 drivers/usb/gadget/udc/core.c:195 [] gadgetfs_unbind+0x131/0x190 drivers/usb/gadget/legacy/inode.c:1612 [] usb_gadget_remove_driver+0x10f/0x2b0 drivers/usb/gadget/udc/core.c:1228 [] usb_gadget_unregister_driver+0x154/0x240 drivers/usb/gadget/udc/core.c:1357 This patch fixes the problem by iterating over all the endpoints in the driver's ep array instead of iterating over the gadget's ep_list, which explicitly leaves out ep0. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 77d07904f932..a81d9ab861dc 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -330,7 +330,7 @@ static void nuke(struct dummy *dum, struct dummy_ep *ep) /* caller must hold lock */ static void stop_activity(struct dummy *dum) { - struct dummy_ep *ep; + int i; /* prevent any more requests */ dum->address = 0; @@ -338,8 +338,8 @@ static void stop_activity(struct dummy *dum) /* The timer is left running so that outstanding URBs can fail */ /* nuke any pending requests first, so driver i/o is quiesced */ - list_for_each_entry(ep, &dum->gadget.ep_list, ep.ep_list) - nuke(dum, ep); + for (i = 0; i < DUMMY_ENDPOINTS; ++i) + nuke(dum, &dum->ep[i]); /* driver now does any non-usb quiescing necessary */ } -- GitLab From 2b95c939cb88c3182e9dd681d4cf40b70985b8a5 Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Tue, 20 Dec 2016 19:52:16 +0100 Subject: [PATCH 0237/1442] usb: gadget: composite: Test get_alt() presence instead of set_alt() commit 7e4da3fcf7c9fe042f2f7cb7bf23861a899b4a8f upstream. By convention (according to doc) if function does not provide get_alt() callback composite framework should assume that it has only altsetting 0 and should respond with error if host tries to set other one. After commit dd4dff8b035f ("USB: composite: Fix bug: should test set_alt function pointer before use it") we started checking set_alt() callback instead of get_alt(). This check is useless as we check if set_alt() is set inside usb_add_function() and fail if it's NULL. Let's fix this check and move comment about why we check the get method instead of set a little bit closer to prevent future false fixes. Fixes: dd4dff8b035f ("USB: composite: Fix bug: should test set_alt function pointer before use it") Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index e38b21087d26..e1841fa96412 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1694,9 +1694,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) value = min(w_length, (u16) 1); break; - /* function drivers must handle get/set altsetting; if there's - * no get() method, we know only altsetting zero works. - */ + /* function drivers must handle get/set altsetting */ case USB_REQ_SET_INTERFACE: if (ctrl->bRequestType != USB_RECIP_INTERFACE) goto unknown; @@ -1705,7 +1703,13 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) f = cdev->config->interface[intf]; if (!f) break; - if (w_value && !f->set_alt) + + /* + * If there's no get_alt() method, we know only altsetting zero + * works. There is no need to check if set_alt() is not NULL + * as we check this in usb_add_function(). + */ + if (w_value && !f->get_alt) break; value = f->set_alt(f, w_index, w_value); if (value == USB_GADGET_DELAYED_STATUS) { -- GitLab From 9bdd47c53b7cf79f68ef5038e9494261f060f64e Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 23 Dec 2016 14:40:40 +0200 Subject: [PATCH 0238/1442] usb: dwc3: core: avoid Overflow events commit e71d363d9c611c99fb78f53bfee99616e7fe352c upstream. Now that we're handling so many transfers at a time and for some dwc3 revisions LPM events *must* be enabled, we can fall into a situation where too many events fire and we start receiving Overflow events. Let's do what XHCI does and allocate a full page for the Event Ring, this will avoid any future issues. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 6b60e42626a2..cc035e9eed86 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -43,9 +43,7 @@ #define DWC3_XHCI_RESOURCES_NUM 2 #define DWC3_SCRATCHBUF_SIZE 4096 /* each buffer is assumed to be 4KiB */ -#define DWC3_EVENT_SIZE 4 /* bytes */ -#define DWC3_EVENT_MAX_NUM 64 /* 2 events/endpoint */ -#define DWC3_EVENT_BUFFERS_SIZE (DWC3_EVENT_SIZE * DWC3_EVENT_MAX_NUM) +#define DWC3_EVENT_BUFFERS_SIZE 4096 #define DWC3_EVENT_TYPE_MASK 0xfe #define DWC3_EVENT_TYPE_DEV 0 -- GitLab From 9da8e3e48e8805810096f1fbd9e764c0baece561 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 3 Jan 2017 18:28:46 +0200 Subject: [PATCH 0239/1442] usb: xhci: fix possible wild pointer commit 2b985467371a58ae44d76c7ba12b0951fee6ed98 upstream. handle_cmd_completion() frees a command structure which might be still referenced by xhci->current_cmd. This might cause problem when xhci->current_cmd is accessed after that. A real-life case could be like this. The host takes a very long time to respond to a command, and the command timer is fired at the same time when the command completion event arrives. The command completion handler frees xhci->current_cmd before the timer function can grab xhci->lock. Afterward, timer function grabs the lock and go ahead with checking and setting members of xhci->current_cmd. Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 797137e26549..f7431f4df5a3 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1271,14 +1271,18 @@ void xhci_handle_command_timeout(unsigned long data) bool second_timeout = false; xhci = (struct xhci_hcd *) data; - /* mark this command to be cancelled */ spin_lock_irqsave(&xhci->lock, flags); - if (xhci->current_cmd) { - if (xhci->current_cmd->status == COMP_CMD_ABORT) - second_timeout = true; - xhci->current_cmd->status = COMP_CMD_ABORT; + + if (!xhci->current_cmd) { + spin_unlock_irqrestore(&xhci->lock, flags); + return; } + /* mark this command to be cancelled */ + if (xhci->current_cmd->status == COMP_CMD_ABORT) + second_timeout = true; + xhci->current_cmd->status = COMP_CMD_ABORT; + /* Make sure command ring is running before aborting it */ hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) && @@ -1427,6 +1431,8 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, xhci->current_cmd = list_entry(cmd->cmd_list.next, struct xhci_command, cmd_list); mod_timer(&xhci->cmd_timer, jiffies + XHCI_CMD_DEFAULT_TIMEOUT); + } else if (xhci->current_cmd == cmd) { + xhci->current_cmd = NULL; } event_handled: -- GitLab From 40359f91569496e77494116dddf8dd317f29d9e5 Mon Sep 17 00:00:00 2001 From: Wan Ahmad Zainie Date: Tue, 3 Jan 2017 18:28:52 +0200 Subject: [PATCH 0240/1442] usb: xhci: apply XHCI_PME_STUCK_QUIRK to Intel Apollo Lake commit 6c97cfc1a097b1e0786c836e92b7a72b4d031e25 upstream. Intel Apollo Lake also requires XHCI_PME_STUCK_QUIRK. Adding its PCI ID to quirk. Signed-off-by: Wan Ahmad Zainie Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index e96ae80d107e..954abfd5014d 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -165,7 +165,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && -- GitLab From 3bf5e7410178eb0f95f2c51c984d67b77a8e61d3 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 3 Jan 2017 18:28:43 +0200 Subject: [PATCH 0241/1442] xhci: free xhci virtual devices with leaf nodes first commit ee8665e28e8d90ce69d4abe5a469c14a8707ae0e upstream. the tt_info provided by a HS hub might be in use to by a child device Make sure we free the devices in the correct order. This is needed in special cases such as when xhci controller is reset when resuming from hibernate, and all virt_devices are freed. Also free the virt_devices starting from max slot_id as children more commonly have higher slot_id than parent. Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 38 +++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 6afe32381209..b3a5cd86e86c 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -979,6 +979,40 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) xhci->devs[slot_id] = NULL; } +/* + * Free a virt_device structure. + * If the virt_device added a tt_info (a hub) and has children pointing to + * that tt_info, then free the child first. Recursive. + * We can't rely on udev at this point to find child-parent relationships. + */ +void xhci_free_virt_devices_depth_first(struct xhci_hcd *xhci, int slot_id) +{ + struct xhci_virt_device *vdev; + struct list_head *tt_list_head; + struct xhci_tt_bw_info *tt_info, *next; + int i; + + vdev = xhci->devs[slot_id]; + if (!vdev) + return; + + tt_list_head = &(xhci->rh_bw[vdev->real_port - 1].tts); + list_for_each_entry_safe(tt_info, next, tt_list_head, tt_list) { + /* is this a hub device that added a tt_info to the tts list */ + if (tt_info->slot_id == slot_id) { + /* are any devices using this tt_info? */ + for (i = 1; i < HCS_MAX_SLOTS(xhci->hcs_params1); i++) { + vdev = xhci->devs[i]; + if (vdev && (vdev->tt_info == tt_info)) + xhci_free_virt_devices_depth_first( + xhci, i); + } + } + } + /* we are now at a leaf device */ + xhci_free_virt_device(xhci, slot_id); +} + int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, struct usb_device *udev, gfp_t flags) { @@ -1829,8 +1863,8 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) } } - for (i = 1; i < MAX_HC_SLOTS; ++i) - xhci_free_virt_device(xhci, i); + for (i = HCS_MAX_SLOTS(xhci->hcs_params1); i > 0; i--) + xhci_free_virt_devices_depth_first(xhci, i); dma_pool_destroy(xhci->segment_pool); xhci->segment_pool = NULL; -- GitLab From a2118d0974095b994a8aa7b32a6a213b961f4f3e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 3 Jan 2017 18:28:44 +0200 Subject: [PATCH 0242/1442] usb: xhci: fix return value of xhci_setup_device() commit 90797aee5d6902b49a453c97d83c326408aeb5a8 upstream. xhci_setup_device() should return failure with correct error number when xhci host has died, removed or halted. During usb device enumeration, if usb host is not accessible (died, removed or halted), the hc_driver->address_device() should return a corresponding error code to usb core. But current xhci driver just returns success. This misleads usb core to continue the enumeration by reading the device descriptor, which will result in failure, and users will get a misleading message like "device descriptor read/8, error -110". Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1a4ca02729c2..ad0624386950 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3783,8 +3783,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_lock(&xhci->mutex); - if (xhci->xhc_state) /* dying, removing or halted */ + if (xhci->xhc_state) { /* dying, removing or halted */ + ret = -ESHUTDOWN; goto out; + } if (!udev->slot_id) { xhci_dbg_trace(xhci, trace_xhci_dbg_address, -- GitLab From 78ccc1966c9e3dc379a89fb3efb06db35a297680 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 3 Jan 2017 18:28:47 +0200 Subject: [PATCH 0243/1442] usb: host: xhci: Fix possible wild pointer when handling abort command commit 2a7cfdf37b7c08ac29df4c62ea5ccb01474b6597 upstream. When current command was supposed to be aborted, host will free the command in handle_cmd_completion() function. But it might be still referenced by xhci->current_cmd, which need to set NULL. Signed-off-by: Baolin Wang Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index f7431f4df5a3..8a8c133280ab 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1368,8 +1368,11 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, */ if (cmd_comp_code == COMP_CMD_ABORT) { xhci->cmd_ring_state = CMD_RING_STATE_STOPPED; - if (cmd->status == COMP_CMD_ABORT) + if (cmd->status == COMP_CMD_ABORT) { + if (xhci->current_cmd == cmd) + xhci->current_cmd = NULL; goto event_handled; + } } cmd_type = TRB_FIELD_TO_TYPE(le32_to_cpu(cmd_trb->generic.field[3])); -- GitLab From 9e6c400bb58220c46a2e5e616a1fbf756f011489 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 3 Jan 2017 18:28:48 +0200 Subject: [PATCH 0244/1442] xhci: Handle command completion and timeout race commit a5a1b9514154437aa1ed35c291191f82fd3e941a upstream. If we get a command completion event at the same time as the command timeout work starts on another cpu we might end up aborting the wrong command. If the command completion takes the xhci lock before the timeout work, it will handle the command, pick the next command, mark it as current_cmd, and re-queue the timeout work. When the timeout work finally gets the lock It will start aborting the wrong command. This case can be resolved by checking if the timeout work is pending inside the timeout function itself. A new timeout work can only be pending if the command completed and a new command was queued. If there are no more commands pending then command completion will set the current_cmd to NULL, which is already handled in the timeout work. Reported-by: Baolin Wang Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 8a8c133280ab..3c815592002e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1273,7 +1273,11 @@ void xhci_handle_command_timeout(unsigned long data) spin_lock_irqsave(&xhci->lock, flags); - if (!xhci->current_cmd) { + /* + * If timeout work is pending, or current_cmd is NULL, it means we + * raced with command completion. Command is handled so just return. + */ + if (!xhci->current_cmd || timer_pending(&xhci->cmd_timer)) { spin_unlock_irqrestore(&xhci->lock, flags); return; } -- GitLab From cb02cce9a7f8eea6f6c8432f296074d20829ff0f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 3 Jan 2017 18:28:49 +0200 Subject: [PATCH 0245/1442] usb: xhci: hold lock over xhci_abort_cmd_ring() commit 4dea70778c0f48b4385c7720c363ec8d37a401b4 upstream. In command timer function, xhci_handle_command_timeout(), xhci->lock is unlocked before call into xhci_abort_cmd_ring(). This might cause race between the timer function and the event handler. The xhci_abort_cmd_ring() function sets the CMD_RING_ABORT bit in the command register and polling it until the setting takes effect. A stop command ring event might be handled between writing the abort bit and polling for it. The event handler will restart the command ring, which causes the failure of polling, and we ever believed that we failed to stop it. As a bonus, this also fixes some issues of calling functions without locking in xhci_handle_command_timeout(). Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 3c815592002e..7fe375075687 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1291,29 +1291,34 @@ void xhci_handle_command_timeout(unsigned long data) hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) && (hw_ring_state & CMD_RING_RUNNING)) { - spin_unlock_irqrestore(&xhci->lock, flags); xhci_dbg(xhci, "Command timeout\n"); ret = xhci_abort_cmd_ring(xhci); if (unlikely(ret == -ESHUTDOWN)) { xhci_err(xhci, "Abort command ring failed\n"); xhci_cleanup_command_queue(xhci); + spin_unlock_irqrestore(&xhci->lock, flags); usb_hc_died(xhci_to_hcd(xhci)->primary_hcd); xhci_dbg(xhci, "xHCI host controller is dead.\n"); + + return; } - return; + + goto time_out_completed; } /* command ring failed to restart, or host removed. Bail out */ if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) { - spin_unlock_irqrestore(&xhci->lock, flags); xhci_dbg(xhci, "command timed out twice, ring start fail?\n"); xhci_cleanup_command_queue(xhci); - return; + + goto time_out_completed; } /* command timeout on stopped ring, ring can't be aborted */ xhci_dbg(xhci, "Command timeout on stopped ring\n"); xhci_handle_stopped_cmd_ring(xhci, xhci->current_cmd); + +time_out_completed: spin_unlock_irqrestore(&xhci->lock, flags); return; } -- GitLab From 9acba5179d6c08f5499de52cb543d2b2fbb98cc2 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 3 Jan 2017 18:28:45 +0200 Subject: [PATCH 0246/1442] usb: return error code when platform_get_irq fails commit 28bedb5ae463b9f7e5195cbc93f1795e374bdef8 upstream. In function xhci_mtk_probe(), variable ret takes the return value. Its value should be negative on failures. However, when the call to function platform_get_irq() fails, it does not set the error code, and 0 will be returned. 0 indicates no error. As a result, the callers of function xhci_mtk_probe() will not be able to detect the error. This patch fixes the bug by assigning the return value of platform_get_irq() to variable ret if it fails. Signed-off-by: Pan Bian Reviewed-by: Matthias Brugger Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 79959f17c38c..f2365a47fa4a 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -560,8 +560,10 @@ static int xhci_mtk_probe(struct platform_device *pdev) goto disable_ldos; irq = platform_get_irq(pdev, 0); - if (irq < 0) + if (irq < 0) { + ret = irq; goto disable_clk; + } /* Initialize dma_mask and coherent_dma_mask to 32-bits */ ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); -- GitLab From 6ae3e89aa6b1cc3ae129922c7132ffea0aa6b19c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:58 +0100 Subject: [PATCH 0247/1442] USB: serial: omninet: fix NULL-derefs at open and disconnect commit a5bc01949e3b19d8a23b5eabc6fc71bb50dc820e upstream. Fix NULL-pointer dereferences at open() and disconnect() should the device lack the expected bulk-out endpoints: Unable to handle kernel NULL pointer dereference at virtual address 000000b4 ... [c0170ff0>] (__lock_acquire) from [] (lock_acquire+0x108/0x264) [] (lock_acquire) from [] (_raw_spin_lock_irqsave+0x58/0x6c) [] (_raw_spin_lock_irqsave) from [] (tty_port_tty_set+0x28/0xa4) [] (tty_port_tty_set) from [] (omninet_open+0x30/0x40 [omninet]) [] (omninet_open [omninet]) from [] (serial_port_activate+0x68/0x98 [usbserial]) Unable to handle kernel NULL pointer dereference at virtual address 00000234 ... [] (omninet_disconnect [omninet]) from [] (usb_serial_disconnect+0xe4/0x100 [usbserial]) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/omninet.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index f6c6900bccf0..a180b17d2432 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -38,6 +38,7 @@ static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int omninet_write_room(struct tty_struct *tty); static void omninet_disconnect(struct usb_serial *serial); +static int omninet_attach(struct usb_serial *serial); static int omninet_port_probe(struct usb_serial_port *port); static int omninet_port_remove(struct usb_serial_port *port); @@ -56,6 +57,7 @@ static struct usb_serial_driver zyxel_omninet_device = { .description = "ZyXEL - omni.net lcd plus usb", .id_table = id_table, .num_ports = 1, + .attach = omninet_attach, .port_probe = omninet_port_probe, .port_remove = omninet_port_remove, .open = omninet_open, @@ -104,6 +106,17 @@ struct omninet_data { __u8 od_outseq; /* Sequence number for bulk_out URBs */ }; +static int omninet_attach(struct usb_serial *serial) +{ + /* The second bulk-out endpoint is used for writing. */ + if (serial->num_bulk_out < 2) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int omninet_port_probe(struct usb_serial_port *port) { struct omninet_data *od; -- GitLab From 2a4ae7bc3d8d8e4b92dbac37c90ba1708e7e5c80 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:40:01 +0100 Subject: [PATCH 0248/1442] USB: serial: quatech2: fix sleep-while-atomic in close commit f09d1886a41e9063b43da493ef0e845ac8afd2fa upstream. The write URB was being killed using the synchronous interface while holding a spin lock in close(). Simply drop the lock and busy-flag update, something which would have been taken care of by the completion handler if the URB was in flight. Fixes: f7a33e608d9a ("USB: serial: add quatech2 usb to serial driver") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/quatech2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c index 85acb50a7ee2..bd1a1307e0f0 100644 --- a/drivers/usb/serial/quatech2.c +++ b/drivers/usb/serial/quatech2.c @@ -408,16 +408,12 @@ static void qt2_close(struct usb_serial_port *port) { struct usb_serial *serial; struct qt2_port_private *port_priv; - unsigned long flags; int i; serial = port->serial; port_priv = usb_get_serial_port_data(port); - spin_lock_irqsave(&port_priv->urb_lock, flags); usb_kill_urb(port_priv->write_urb); - port_priv->urb_in_use = false; - spin_unlock_irqrestore(&port_priv->urb_lock, flags); /* flush the port transmit buffer */ i = usb_control_msg(serial->dev, -- GitLab From 69c415ed5c98c03e7f12c5e2cd7d7e5f7708e4f4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:40:00 +0100 Subject: [PATCH 0249/1442] USB: serial: pl2303: fix NULL-deref at open commit 76ab439ed1b68778e9059c79ecc5d14de76c89a8 upstream. Fix NULL-pointer dereference in open() should a type-0 or type-1 device lack the expected endpoints: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at pl2303_open+0x38/0xec [pl2303] Note that a missing interrupt-in endpoint would have caused open() to fail. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index ae682e4eeaef..46fca6b75846 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -220,9 +220,17 @@ static int pl2303_probe(struct usb_serial *serial, static int pl2303_startup(struct usb_serial *serial) { struct pl2303_serial_private *spriv; + unsigned char num_ports = serial->num_ports; enum pl2303_type type = TYPE_01; unsigned char *buf; + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + spriv = kzalloc(sizeof(*spriv), GFP_KERNEL); if (!spriv) return -ENOMEM; -- GitLab From dda7611ec4a5ced89cebb3a9c48afc9c846a1f83 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:48 +0100 Subject: [PATCH 0250/1442] USB: serial: keyspan_pda: verify endpoints at probe commit 5d9b0f859babe96175cd33d7162a9463a875ffde upstream. Check for the expected endpoints in attach() and fail loudly if not present. Note that failing to do this appears to be benign since da280e348866 ("USB: keyspan_pda: clean up write-urb busy handling") which prevents a NULL-pointer dereference in write() by never marking a non-existent write-urb as free. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/keyspan_pda.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index e49ad0c63ad8..83523fcf6fb9 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -699,6 +699,19 @@ MODULE_FIRMWARE("keyspan_pda/keyspan_pda.fw"); MODULE_FIRMWARE("keyspan_pda/xircom_pgs.fw"); #endif +static int keyspan_pda_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int keyspan_pda_port_probe(struct usb_serial_port *port) { @@ -776,6 +789,7 @@ static struct usb_serial_driver keyspan_pda_device = { .break_ctl = keyspan_pda_break_ctl, .tiocmget = keyspan_pda_tiocmget, .tiocmset = keyspan_pda_tiocmset, + .attach = keyspan_pda_attach, .port_probe = keyspan_pda_port_probe, .port_remove = keyspan_pda_port_remove, }; -- GitLab From b5264ea24484c8d34a0db7ed9085ab41cfc00576 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:40:02 +0100 Subject: [PATCH 0251/1442] USB: serial: spcp8x5: fix NULL-deref at open commit cc0909248258f679c4bb4cd315565d40abaf6bc6 upstream. Fix NULL-pointer dereference in open() should the device lack the expected endpoints: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at spcp8x5_open+0x30/0xd0 [spcp8x5] Fixes: 619a6f1d1423 ("USB: add usb-serial spcp8x5 driver") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/spcp8x5.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index ef0dbf0703c5..475e6c31b266 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -154,6 +154,19 @@ static int spcp8x5_probe(struct usb_serial *serial, return 0; } +static int spcp8x5_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int spcp8x5_port_probe(struct usb_serial_port *port) { const struct usb_device_id *id = usb_get_serial_data(port->serial); @@ -477,6 +490,7 @@ static struct usb_serial_driver spcp8x5_device = { .tiocmget = spcp8x5_tiocmget, .tiocmset = spcp8x5_tiocmset, .probe = spcp8x5_probe, + .attach = spcp8x5_attach, .port_probe = spcp8x5_port_probe, .port_remove = spcp8x5_port_remove, }; -- GitLab From 32631d1a739f919db4dea05c98543fa4fcfcb7c6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:43 +0100 Subject: [PATCH 0252/1442] USB: serial: io_ti: fix NULL-deref at open commit a323fefc6f5079844dc62ffeb54f491d0242ca35 upstream. Fix NULL-pointer dereference when clearing halt at open should a malicious device lack the expected endpoints when in download mode. Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... [] (edge_open [io_ti]) from [] (serial_port_activate+0x68/0x98 [usbserial]) [] (serial_port_activate [usbserial]) from [] (tty_port_open+0x9c/0xe8) [] (tty_port_open) from [] (serial_open+0x48/0x6c [usbserial]) [] (serial_open [usbserial]) from [] (tty_open+0xcc/0x5cc) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_ti.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index fce82fd79f77..fd7d9b57ccd5 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -2549,6 +2549,13 @@ static int edge_startup(struct usb_serial *serial) int status; u16 product_id; + /* Make sure we have the required endpoints when in download mode. */ + if (serial->interface->cur_altsetting->desc.bNumEndpoints > 1) { + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) + return -ENODEV; + } + /* create our private serial structure */ edge_serial = kzalloc(sizeof(struct edgeport_serial), GFP_KERNEL); if (!edge_serial) -- GitLab From a66274a9b2f4d570cdb4f44417bfc7efd97a8bab Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:44 +0100 Subject: [PATCH 0253/1442] USB: serial: io_ti: fix another NULL-deref at open commit 4f9785cc99feeb3673993b471f646b4dbaec2cc1 upstream. In case a device is left in "boot-mode" we must not register any port devices in order to avoid a NULL-pointer dereference on open due to missing endpoints. This could be used by a malicious device to trigger an OOPS: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... [] (edge_open [io_ti]) from [] (serial_port_activate+0x68/0x98 [usbserial]) [] (serial_port_activate [usbserial]) from [] (tty_port_open+0x9c/0xe8) [] (tty_port_open) from [] (serial_open+0x48/0x6c [usbserial]) [] (serial_open [usbserial]) from [] (tty_open+0xcc/0x5cc) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_ti.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index fd7d9b57ccd5..07779c721242 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1508,7 +1508,7 @@ static int do_boot_mode(struct edgeport_serial *serial, dev_dbg(dev, "%s - STAYING IN BOOT MODE\n", __func__); serial->product_info.TiMode = TI_MODE_BOOT; - return 0; + return 1; } static int ti_do_config(struct edgeport_port *port, int feature, int on) @@ -2563,14 +2563,18 @@ static int edge_startup(struct usb_serial *serial) mutex_init(&edge_serial->es_lock); edge_serial->serial = serial; + INIT_DELAYED_WORK(&edge_serial->heartbeat_work, edge_heartbeat_work); usb_set_serial_data(serial, edge_serial); status = download_fw(edge_serial); - if (status) { + if (status < 0) { kfree(edge_serial); return status; } + if (status > 0) + return 1; /* bind but do not register any ports */ + product_id = le16_to_cpu( edge_serial->serial->dev->descriptor.idProduct); @@ -2582,7 +2586,6 @@ static int edge_startup(struct usb_serial *serial) } } - INIT_DELAYED_WORK(&edge_serial->heartbeat_work, edge_heartbeat_work); edge_heartbeat_schedule(edge_serial); return 0; -- GitLab From 69017618a61ea173bea773a4934a63030ac9998f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:45 +0100 Subject: [PATCH 0254/1442] USB: serial: io_ti: fix I/O after disconnect commit 2330d0a853da260d8a9834a70df448032b9ff623 upstream. Cancel the heartbeat work on driver unbind in order to avoid I/O after disconnect in case the port is held open. Note that the cancel in release() is still needed to stop the heartbeat after late probe errors. Fixes: 26c78daade0f ("USB: io_ti: Add heartbeat to keep idle EP/416 ports from disconnecting") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_ti.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 07779c721242..8dab8e5e339e 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -2593,6 +2593,9 @@ static int edge_startup(struct usb_serial *serial) static void edge_disconnect(struct usb_serial *serial) { + struct edgeport_serial *edge_serial = usb_get_serial_data(serial); + + cancel_delayed_work_sync(&edge_serial->heartbeat_work); } static void edge_release(struct usb_serial *serial) -- GitLab From 9401cc62b7f5acbb65d689e4da2b1b00c85861c3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:47 +0100 Subject: [PATCH 0255/1442] USB: serial: iuu_phoenix: fix NULL-deref at open commit 90507d54f712d81b74815ef3a4bbb555cd9fab2f upstream. Fix NULL-pointer dereference at open should the device lack a bulk-in or bulk-out endpoint: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at iuu_open+0x78/0x59c [iuu_phoenix] Fixes: 07c3b1a10016 ("USB: remove broken usb-serial num_endpoints check") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/iuu_phoenix.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index 344b4eea4bd5..d57fb5199218 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -68,6 +68,16 @@ struct iuu_private { u32 clk; }; +static int iuu_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || serial->num_bulk_out < num_ports) + return -ENODEV; + + return 0; +} + static int iuu_port_probe(struct usb_serial_port *port) { struct iuu_private *priv; @@ -1196,6 +1206,7 @@ static struct usb_serial_driver iuu_device = { .tiocmset = iuu_tiocmset, .set_termios = iuu_set_termios, .init_termios = iuu_init_termios, + .attach = iuu_attach, .port_probe = iuu_port_probe, .port_remove = iuu_port_remove, }; -- GitLab From 0556702bf34e6c464b7ca0e14f66a87e8959aea0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:41 +0100 Subject: [PATCH 0256/1442] USB: serial: garmin_gps: fix memory leak on failed URB submit commit c4ac4496e835b78a45dfbf74f6173932217e4116 upstream. Make sure to free the URB transfer buffer in case submission fails (e.g. due to a disconnect). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/garmin_gps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 97cabf803c2f..b2f2e87aed94 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -1043,6 +1043,7 @@ static int garmin_write_bulk(struct usb_serial_port *port, "%s - usb_submit_urb(write bulk) failed with status = %d\n", __func__, status); count = status; + kfree(buffer); } /* we are done with this urb, so let the host driver -- GitLab From 2e5167b239e62bce68e839dc53fffa8d8d2dd7e4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:40:03 +0100 Subject: [PATCH 0257/1442] USB: serial: ti_usb_3410_5052: fix NULL-deref at open commit ef079936d3cd09e63612834fe2698eeada0d8e3f upstream. Fix NULL-pointer dereference in open() should a malicious device lack the expected endpoints: Unable to handle kernel NULL pointer dereference at virtual address 00000030 .. [] (ti_open [ti_usb_3410_5052]) from [] (serial_port_activate+0x68/0x98 [usbserial]) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ti_usb_3410_5052.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index a8b9bdba314f..bdbddbc8bd4d 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -579,6 +579,13 @@ static int ti_startup(struct usb_serial *serial) goto free_tdev; } + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + status = -ENODEV; + goto free_tdev; + } + return 0; free_tdev: -- GitLab From 65914eeb39f47f44766b62a008e632b077b21100 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:42 +0100 Subject: [PATCH 0258/1442] USB: serial: io_edgeport: fix NULL-deref at open commit 0dd408425eb21ddf26a692b3c8044c9e7d1a7948 upstream. Fix NULL-pointer dereference when initialising URBs at open should a non-EPIC device lack a bulk-in or interrupt-in endpoint. Unable to handle kernel NULL pointer dereference at virtual address 00000028 ... PC is at edge_open+0x24c/0x3e8 [io_edgeport] Note that the EPIC-device probe path has the required sanity checks so this makes those checks partially redundant. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_edgeport.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 11c05ce2f35f..36dfe9972b17 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -2754,6 +2754,11 @@ static int edge_startup(struct usb_serial *serial) EDGE_COMPATIBILITY_MASK1, EDGE_COMPATIBILITY_MASK2 }; + if (serial->num_bulk_in < 1 || serial->num_interrupt_in < 1) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + dev = serial->dev; /* create our private serial structure */ -- GitLab From 4de811c61ac562295adae03c06f581c167efd307 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:59 +0100 Subject: [PATCH 0259/1442] USB: serial: oti6858: fix NULL-deref at open commit 5afeef2366db14587b65558bbfd5a067542e07fb upstream. Fix NULL-pointer dereference in open() should the device lack the expected endpoints: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at oti6858_open+0x30/0x1d0 [oti6858] Note that a missing interrupt-in endpoint would have caused open() to fail. Fixes: 49cdee0ed0fc ("USB: oti6858 usb-serial driver (in Nokia CA-42 cable)") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/oti6858.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c index a4b88bc038b6..b8bf52bf7a94 100644 --- a/drivers/usb/serial/oti6858.c +++ b/drivers/usb/serial/oti6858.c @@ -134,6 +134,7 @@ static int oti6858_chars_in_buffer(struct tty_struct *tty); static int oti6858_tiocmget(struct tty_struct *tty); static int oti6858_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear); +static int oti6858_attach(struct usb_serial *serial); static int oti6858_port_probe(struct usb_serial_port *port); static int oti6858_port_remove(struct usb_serial_port *port); @@ -158,6 +159,7 @@ static struct usb_serial_driver oti6858_device = { .write_bulk_callback = oti6858_write_bulk_callback, .write_room = oti6858_write_room, .chars_in_buffer = oti6858_chars_in_buffer, + .attach = oti6858_attach, .port_probe = oti6858_port_probe, .port_remove = oti6858_port_remove, }; @@ -324,6 +326,20 @@ static void send_data(struct work_struct *work) usb_serial_port_softint(port); } +static int oti6858_attach(struct usb_serial *serial) +{ + unsigned char num_ports = serial->num_ports; + + if (serial->num_bulk_in < num_ports || + serial->num_bulk_out < num_ports || + serial->num_interrupt_in < num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int oti6858_port_probe(struct usb_serial_port *port) { struct oti6858_private *priv; -- GitLab From b2aa55142ea5cff7217f2633e85602d2279ff92e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:40 +0100 Subject: [PATCH 0260/1442] USB: serial: cyberjack: fix NULL-deref at open commit 3dca01114dcecb1cf324534cd8d75fd1306a516b upstream. Fix NULL-pointer dereference when clearing halt at open should the device lack a bulk-out endpoint. Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at cyberjack_open+0x40/0x9c [cyberjack] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cyberjack.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index 5f17a3b9916d..80260b08398b 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -50,6 +50,7 @@ #define CYBERJACK_PRODUCT_ID 0x0100 /* Function prototypes */ +static int cyberjack_attach(struct usb_serial *serial); static int cyberjack_port_probe(struct usb_serial_port *port); static int cyberjack_port_remove(struct usb_serial_port *port); static int cyberjack_open(struct tty_struct *tty, @@ -77,6 +78,7 @@ static struct usb_serial_driver cyberjack_device = { .description = "Reiner SCT Cyberjack USB card reader", .id_table = id_table, .num_ports = 1, + .attach = cyberjack_attach, .port_probe = cyberjack_port_probe, .port_remove = cyberjack_port_remove, .open = cyberjack_open, @@ -100,6 +102,14 @@ struct cyberjack_private { short wrsent; /* Data already sent */ }; +static int cyberjack_attach(struct usb_serial *serial) +{ + if (serial->num_bulk_out < serial->num_ports) + return -ENODEV; + + return 0; +} + static int cyberjack_port_probe(struct usb_serial_port *port) { struct cyberjack_private *priv; -- GitLab From 9de856caff86c18f092a03a16f06f1422796be94 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:49 +0100 Subject: [PATCH 0261/1442] USB: serial: kobil_sct: fix NULL-deref in write commit 21ce57840243c7b70fbc1ebd3dceeb70bb6e9e09 upstream. Fix NULL-pointer dereference in write() should the device lack the expected interrupt-out endpoint: Unable to handle kernel NULL pointer dereference at virtual address 00000054 ... PC is at kobil_write+0x144/0x2a0 [kobil_sct] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/kobil_sct.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index 2363654cafc9..813035f51fe7 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -51,6 +51,7 @@ /* Function prototypes */ +static int kobil_attach(struct usb_serial *serial); static int kobil_port_probe(struct usb_serial_port *probe); static int kobil_port_remove(struct usb_serial_port *probe); static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port); @@ -86,6 +87,7 @@ static struct usb_serial_driver kobil_device = { .description = "KOBIL USB smart card terminal", .id_table = id_table, .num_ports = 1, + .attach = kobil_attach, .port_probe = kobil_port_probe, .port_remove = kobil_port_remove, .ioctl = kobil_ioctl, @@ -113,6 +115,16 @@ struct kobil_private { }; +static int kobil_attach(struct usb_serial *serial) +{ + if (serial->num_interrupt_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing interrupt-out endpoint\n"); + return -ENODEV; + } + + return 0; +} + static int kobil_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; -- GitLab From bd52ddb0996af221523f7572840c6875bc0d4b8b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:55 +0100 Subject: [PATCH 0262/1442] USB: serial: mos7840: fix NULL-deref at open commit 5c75633ef751dd4cd8f443dc35152c1ae563162e upstream. Fix NULL-pointer dereference in open() should the device lack the expected endpoints: Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... PC is at mos7840_open+0x88/0x8dc [mos7840] Note that we continue to treat the interrupt-in endpoint as optional for now. Fixes: 3f5429746d91 ("USB: Moschip 7840 USB-Serial Driver") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7840.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 57426d703a09..4f9af47e6a29 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -2116,6 +2116,17 @@ static int mos7840_calc_num_ports(struct usb_serial *serial) return mos7840_num_ports; } +static int mos7840_attach(struct usb_serial *serial) +{ + if (serial->num_bulk_in < serial->num_ports || + serial->num_bulk_out < serial->num_ports) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + return 0; +} + static int mos7840_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; @@ -2391,6 +2402,7 @@ static struct usb_serial_driver moschip7840_4port_device = { .tiocmset = mos7840_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, + .attach = mos7840_attach, .port_probe = mos7840_port_probe, .port_remove = mos7840_port_remove, .read_bulk_callback = mos7840_bulk_in_callback, -- GitLab From ac81f1fa956f217b5a111631b81e7d7c9b6ebd73 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:50 +0100 Subject: [PATCH 0263/1442] USB: serial: mos7720: fix NULL-deref at open commit b05aebc25fdc5aeeac3ee29f0dc9f58dd07c13cc upstream. Fix NULL-pointer dereference at port open if a device lacks the expected bulk in and out endpoints. Unable to handle kernel NULL pointer dereference at virtual address 00000030 ... [] (mos7720_open [mos7720]) from [] (serial_port_activate+0x68/0x98 [usbserial]) [] (serial_port_activate [usbserial]) from [] (tty_port_open+0x9c/0xe8) [] (tty_port_open) from [] (serial_open+0x48/0x6c [usbserial]) [] (serial_open [usbserial]) from [] (tty_open+0xcc/0x5cc) Fixes: 0f64478cbc7a ("USB: add USB serial mos7720 driver") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7720.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index de9992b492b0..8d085eb95ff9 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -1920,6 +1920,11 @@ static int mos7720_startup(struct usb_serial *serial) u16 product; int ret_val; + if (serial->num_bulk_in < 2 || serial->num_bulk_out < 2) { + dev_err(&serial->interface->dev, "missing bulk endpoints\n"); + return -ENODEV; + } + product = le16_to_cpu(serial->dev->descriptor.idProduct); dev = serial->dev; -- GitLab From e7cf756c89326f8dcee8a109d092d73acc59d72e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:51 +0100 Subject: [PATCH 0264/1442] USB: serial: mos7720: fix use-after-free on probe errors commit 91a1ff4d53c5184d383d0baeeaeab6f9736f2ff3 upstream. The interrupt URB was submitted on probe but never stopped on probe errors. This can lead to use-after-free issues in the completion handler when accessing the freed usb-serial struct: Unable to handle kernel paging request at virtual address 6b6b6be7 ... [] (mos7715_interrupt_callback [mos7720]) from [] (__usb_hcd_giveback_urb+0x80/0x140) [] (__usb_hcd_giveback_urb) from [] (usb_hcd_giveback_urb+0x50/0x138) [] (usb_hcd_giveback_urb) from [] (musb_giveback+0xc8/0x1cc) Fixes: b69578df7e98 ("USB: usbserial: mos7720: add support for parallel port on moschip 7715") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7720.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 8d085eb95ff9..d9d2b5e6a6f3 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -1965,8 +1965,10 @@ static int mos7720_startup(struct usb_serial *serial) #ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT if (product == MOSCHIP_DEVICE_ID_7715) { ret_val = mos7715_parport_init(serial); - if (ret_val < 0) + if (ret_val < 0) { + usb_kill_urb(serial->port[0]->interrupt_in_urb); return ret_val; + } } #endif /* LSR For Port 1 */ @@ -1978,6 +1980,8 @@ static int mos7720_startup(struct usb_serial *serial) static void mos7720_release(struct usb_serial *serial) { + usb_kill_urb(serial->port[0]->interrupt_in_urb); + #ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT /* close the parallel port */ -- GitLab From 6ea44fb2183c6e4c4c7888e35498717f9d637c39 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:52 +0100 Subject: [PATCH 0265/1442] USB: serial: mos7720: fix parport use-after-free on probe errors commit 75dd211e773afcbc264677b0749d1cf7d937ab2d upstream. Do not submit the interrupt URB until after the parport has been successfully registered to avoid another use-after-free in the completion handler when accessing the freed parport private data in case of a racing completion. Fixes: b69578df7e98 ("USB: usbserial: mos7720: add support for parallel port on moschip 7715") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7720.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index d9d2b5e6a6f3..935a2dec3a9b 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -1955,22 +1955,20 @@ static int mos7720_startup(struct usb_serial *serial) usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), (__u8)0x03, 0x00, 0x01, 0x00, NULL, 0x00, 5000); - /* start the interrupt urb */ - ret_val = usb_submit_urb(serial->port[0]->interrupt_in_urb, GFP_KERNEL); - if (ret_val) - dev_err(&dev->dev, - "%s - Error %d submitting control urb\n", - __func__, ret_val); - #ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT if (product == MOSCHIP_DEVICE_ID_7715) { ret_val = mos7715_parport_init(serial); - if (ret_val < 0) { - usb_kill_urb(serial->port[0]->interrupt_in_urb); + if (ret_val < 0) return ret_val; - } } #endif + /* start the interrupt urb */ + ret_val = usb_submit_urb(serial->port[0]->interrupt_in_urb, GFP_KERNEL); + if (ret_val) { + dev_err(&dev->dev, "failed to submit interrupt urb: %d\n", + ret_val); + } + /* LSR For Port 1 */ read_mos_reg(serial, 0, MOS7720_LSR, &data); dev_dbg(&dev->dev, "LSR:%x\n", data); -- GitLab From 1bd67e85edf1826afbc592028bce941e0425f2dc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:53 +0100 Subject: [PATCH 0266/1442] USB: serial: mos7720: fix parallel probe commit fde1faf872ed86d88e245191bc15a8e57368cd1c upstream. A static usb-serial-driver structure that is used to initialise the interrupt URB was modified during probe depending on the currently probed device type, something which could break a parallel probe of a device of a different type. Fix this up by overriding the default completion callback for MCS7715 devices in attach() instead. We may want to use two usb-serial driver instances for the two types later. Fixes: fb088e335d78 ("USB: serial: add support for serial port on the moschip 7715") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/mos7720.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 935a2dec3a9b..136ff5e1b7c1 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -65,8 +65,6 @@ struct moschip_port { struct urb *write_urb_pool[NUM_URBS]; }; -static struct usb_serial_driver moschip7720_2port_driver; - #define USB_VENDOR_ID_MOSCHIP 0x9710 #define MOSCHIP_DEVICE_ID_7720 0x7720 #define MOSCHIP_DEVICE_ID_7715 0x7715 @@ -970,25 +968,6 @@ static void mos7720_bulk_out_data_callback(struct urb *urb) tty_port_tty_wakeup(&mos7720_port->port->port); } -/* - * mos77xx_probe - * this function installs the appropriate read interrupt endpoint callback - * depending on whether the device is a 7720 or 7715, thus avoiding costly - * run-time checks in the high-frequency callback routine itself. - */ -static int mos77xx_probe(struct usb_serial *serial, - const struct usb_device_id *id) -{ - if (id->idProduct == MOSCHIP_DEVICE_ID_7715) - moschip7720_2port_driver.read_int_callback = - mos7715_interrupt_callback; - else - moschip7720_2port_driver.read_int_callback = - mos7720_interrupt_callback; - - return 0; -} - static int mos77xx_calc_num_ports(struct usb_serial *serial) { u16 product = le16_to_cpu(serial->dev->descriptor.idProduct); @@ -1949,6 +1928,12 @@ static int mos7720_startup(struct usb_serial *serial) tmp->interrupt_in_endpointAddress; serial->port[1]->interrupt_in_urb = NULL; serial->port[1]->interrupt_in_buffer = NULL; + + if (serial->port[0]->interrupt_in_urb) { + struct urb *urb = serial->port[0]->interrupt_in_urb; + + urb->complete = mos7715_interrupt_callback; + } } /* setting configuration feature to one */ @@ -2063,7 +2048,6 @@ static struct usb_serial_driver moschip7720_2port_driver = { .close = mos7720_close, .throttle = mos7720_throttle, .unthrottle = mos7720_unthrottle, - .probe = mos77xx_probe, .attach = mos7720_startup, .release = mos7720_release, .port_probe = mos7720_port_probe, @@ -2077,7 +2061,7 @@ static struct usb_serial_driver moschip7720_2port_driver = { .chars_in_buffer = mos7720_chars_in_buffer, .break_ctl = mos7720_break, .read_bulk_callback = mos7720_bulk_in_callback, - .read_int_callback = NULL /* dynamically assigned in probe() */ + .read_int_callback = mos7720_interrupt_callback, }; static struct usb_serial_driver * const serial_drivers[] = { -- GitLab From 6db52153fea3c4cca32712106202955d034e7fad Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 10 Nov 2016 22:33:17 +0300 Subject: [PATCH 0267/1442] usb: xhci-mem: use passed in GFP flags instead of GFP_KERNEL commit c95a9f83711bf53faeb4ed9bbb63a3f065613dfb upstream. We normally use the passed in gfp flags for allocations, it's just these two which were missed. Fixes: 22d45f01a836 ("usb/xhci: replace pci_*_consistent() with dma_*_coherent()") Cc: Mathias Nyman Signed-off-by: Dan Carpenter Acked-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index b3a5cd86e86c..495ffc01fa89 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2418,7 +2418,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) * "physically contiguous and 64-byte (cache line) aligned". */ xhci->dcbaa = dma_alloc_coherent(dev, sizeof(*xhci->dcbaa), &dma, - GFP_KERNEL); + flags); if (!xhci->dcbaa) goto fail; memset(xhci->dcbaa, 0, sizeof *(xhci->dcbaa)); @@ -2514,7 +2514,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) xhci->erst.entries = dma_alloc_coherent(dev, sizeof(struct xhci_erst_entry) * ERST_NUM_SEGS, &dma, - GFP_KERNEL); + flags); if (!xhci->erst.entries) goto fail; xhci_dbg_trace(xhci, trace_xhci_dbg_init, -- GitLab From 799dfdeb33a0f8d08c4a6d6781b20a4011825ee4 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 3 Jan 2017 18:28:50 +0200 Subject: [PATCH 0268/1442] xhci: Use delayed_work instead of timer for command timeout commit cb4d5ce588c5ff68e0fdd30370a0e6bc2c0a736b upstream. This is preparation to fix abort operation race (See "xhci: Fix race related to abort operation"). To make timeout sleepable, use delayed_work instead of timer. [change a newly added pending timer fix to pending work -Mathias] Signed-off-by: OGAWA Hirofumi Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 7 +++---- drivers/usb/host/xhci-ring.c | 26 ++++++++++++++++---------- drivers/usb/host/xhci.h | 4 ++-- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 495ffc01fa89..baba11b6cb1e 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1830,7 +1830,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) int size; int i, j, num_ports; - del_timer_sync(&xhci->cmd_timer); + cancel_delayed_work_sync(&xhci->cmd_timer); /* Free the Event Ring Segment Table and the actual Event Ring */ size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries); @@ -2377,9 +2377,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) INIT_LIST_HEAD(&xhci->cmd_list); - /* init command timeout timer */ - setup_timer(&xhci->cmd_timer, xhci_handle_command_timeout, - (unsigned long)xhci); + /* init command timeout work */ + INIT_DELAYED_WORK(&xhci->cmd_timer, xhci_handle_command_timeout); page_size = readl(&xhci->op_regs->page_size); xhci_dbg_trace(xhci, trace_xhci_dbg_init, diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 7fe375075687..cd07dae430db 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -260,6 +260,11 @@ void xhci_ring_cmd_db(struct xhci_hcd *xhci) readl(&xhci->dba->doorbell[0]); } +static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay) +{ + return mod_delayed_work(system_wq, &xhci->cmd_timer, delay); +} + static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) { u64 temp_64; @@ -276,7 +281,7 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) * but the completion event in never sent. Use the cmd timeout timer to * handle those cases. Use twice the time to cover the bit polling retry */ - mod_timer(&xhci->cmd_timer, jiffies + (2 * XHCI_CMD_DEFAULT_TIMEOUT)); + xhci_mod_cmd_timer(xhci, 2 * XHCI_CMD_DEFAULT_TIMEOUT); xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, &xhci->op_regs->cmd_ring); @@ -301,7 +306,7 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) xhci_err(xhci, "Stopped the command ring failed, " "maybe the host is dead\n"); - del_timer(&xhci->cmd_timer); + cancel_delayed_work(&xhci->cmd_timer); xhci->xhc_state |= XHCI_STATE_DYING; xhci_quiesce(xhci); xhci_halt(xhci); @@ -1255,21 +1260,22 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && !(xhci->xhc_state & XHCI_STATE_DYING)) { xhci->current_cmd = cur_cmd; - mod_timer(&xhci->cmd_timer, jiffies + XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); xhci_ring_cmd_db(xhci); } return; } -void xhci_handle_command_timeout(unsigned long data) +void xhci_handle_command_timeout(struct work_struct *work) { struct xhci_hcd *xhci; int ret; unsigned long flags; u64 hw_ring_state; bool second_timeout = false; - xhci = (struct xhci_hcd *) data; + + xhci = container_of(to_delayed_work(work), struct xhci_hcd, cmd_timer); spin_lock_irqsave(&xhci->lock, flags); @@ -1277,7 +1283,7 @@ void xhci_handle_command_timeout(unsigned long data) * If timeout work is pending, or current_cmd is NULL, it means we * raced with command completion. Command is handled so just return. */ - if (!xhci->current_cmd || timer_pending(&xhci->cmd_timer)) { + if (!xhci->current_cmd || delayed_work_pending(&xhci->cmd_timer)) { spin_unlock_irqrestore(&xhci->lock, flags); return; } @@ -1351,7 +1357,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, cmd = list_entry(xhci->cmd_list.next, struct xhci_command, cmd_list); - del_timer(&xhci->cmd_timer); + cancel_delayed_work(&xhci->cmd_timer); trace_xhci_cmd_completion(cmd_trb, (struct xhci_generic_trb *) event); @@ -1442,7 +1448,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, if (cmd->cmd_list.next != &xhci->cmd_list) { xhci->current_cmd = list_entry(cmd->cmd_list.next, struct xhci_command, cmd_list); - mod_timer(&xhci->cmd_timer, jiffies + XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); } else if (xhci->current_cmd == cmd) { xhci->current_cmd = NULL; } @@ -3938,9 +3944,9 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd, /* if there are no other commands queued we start the timeout timer */ if (xhci->cmd_list.next == &cmd->cmd_list && - !timer_pending(&xhci->cmd_timer)) { + !delayed_work_pending(&xhci->cmd_timer)) { xhci->current_cmd = cmd; - mod_timer(&xhci->cmd_timer, jiffies + XHCI_CMD_DEFAULT_TIMEOUT); + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); } queue_trb(xhci, xhci->cmd_ring, false, field1, field2, field3, diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index f945380035d0..da5bfd6ced12 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1571,7 +1571,7 @@ struct xhci_hcd { #define CMD_RING_STATE_STOPPED (1 << 2) struct list_head cmd_list; unsigned int cmd_ring_reserved_trbs; - struct timer_list cmd_timer; + struct delayed_work cmd_timer; struct xhci_command *current_cmd; struct xhci_ring *event_ring; struct xhci_erst erst; @@ -1941,7 +1941,7 @@ void xhci_queue_config_ep_quirk(struct xhci_hcd *xhci, unsigned int slot_id, unsigned int ep_index, struct xhci_dequeue_state *deq_state); void xhci_stop_endpoint_command_watchdog(unsigned long arg); -void xhci_handle_command_timeout(unsigned long data); +void xhci_handle_command_timeout(struct work_struct *work); void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, unsigned int slot_id, unsigned int ep_index, unsigned int stream_id); -- GitLab From 63d92d10a8208afeed87522a4b4b09e309c8622a Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 3 Jan 2017 18:28:51 +0200 Subject: [PATCH 0269/1442] xhci: Fix race related to abort operation commit 1c111b6c3844a142e03bcfc2fa17bfbdea08e9dc upstream. Current abort operation has race. xhci_handle_command_timeout() xhci_abort_cmd_ring() xhci_write_64(CMD_RING_ABORT) xhci_handshake(5s) do { check CMD_RING_RUNNING udelay(1) ... COMP_CMD_ABORT event COMP_CMD_STOP event xhci_handle_stopped_cmd_ring() restart cmd_ring CMD_RING_RUNNING become 1 again } while () return -ETIMEDOUT xhci_write_64(CMD_RING_ABORT) /* can abort random command */ To do abort operation correctly, we have to wait both of COMP_CMD_STOP event and negation of CMD_RING_RUNNING. But like above, while timeout handler is waiting negation of CMD_RING_RUNNING, event handler can restart cmd_ring. So timeout handler never be notice negation of CMD_RING_RUNNING, and retry of CMD_RING_ABORT can abort random command (BTW, I guess retry of CMD_RING_ABORT was workaround of this race). To fix this race, this moves xhci_handle_stopped_cmd_ring() to xhci_abort_cmd_ring(). And timeout handler waits COMP_CMD_STOP event. At this point, timeout handler is owner of cmd_ring, and safely restart cmd_ring by using xhci_handle_stopped_cmd_ring(). [FWIW, as bonus, this way would be easily extend to add CMD_RING_PAUSE operation] [locks edited as patch is rebased on other locking fixes -Mathias] Signed-off-by: OGAWA Hirofumi Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 1 + drivers/usb/host/xhci-ring.c | 169 ++++++++++++++++++----------------- drivers/usb/host/xhci.h | 1 + 3 files changed, 91 insertions(+), 80 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index baba11b6cb1e..7064892ff4a6 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2379,6 +2379,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) /* init command timeout work */ INIT_DELAYED_WORK(&xhci->cmd_timer, xhci_handle_command_timeout); + init_completion(&xhci->cmd_ring_stop_completion); page_size = readl(&xhci->op_regs->page_size); xhci_dbg_trace(xhci, trace_xhci_dbg_init, diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index cd07dae430db..771a6da9caea 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -265,23 +265,71 @@ static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay) return mod_delayed_work(system_wq, &xhci->cmd_timer, delay); } -static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) +static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci) +{ + return list_first_entry_or_null(&xhci->cmd_list, struct xhci_command, + cmd_list); +} + +/* + * Turn all commands on command ring with status set to "aborted" to no-op trbs. + * If there are other commands waiting then restart the ring and kick the timer. + * This must be called with command ring stopped and xhci->lock held. + */ +static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, + struct xhci_command *cur_cmd) +{ + struct xhci_command *i_cmd; + u32 cycle_state; + + /* Turn all aborted commands in list to no-ops, then restart */ + list_for_each_entry(i_cmd, &xhci->cmd_list, cmd_list) { + + if (i_cmd->status != COMP_CMD_ABORT) + continue; + + i_cmd->status = COMP_CMD_STOP; + + xhci_dbg(xhci, "Turn aborted command %p to no-op\n", + i_cmd->command_trb); + /* get cycle state from the original cmd trb */ + cycle_state = le32_to_cpu( + i_cmd->command_trb->generic.field[3]) & TRB_CYCLE; + /* modify the command trb to no-op command */ + i_cmd->command_trb->generic.field[0] = 0; + i_cmd->command_trb->generic.field[1] = 0; + i_cmd->command_trb->generic.field[2] = 0; + i_cmd->command_trb->generic.field[3] = cpu_to_le32( + TRB_TYPE(TRB_CMD_NOOP) | cycle_state); + + /* + * caller waiting for completion is called when command + * completion event is received for these no-op commands + */ + } + + xhci->cmd_ring_state = CMD_RING_STATE_RUNNING; + + /* ring command ring doorbell to restart the command ring */ + if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && + !(xhci->xhc_state & XHCI_STATE_DYING)) { + xhci->current_cmd = cur_cmd; + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + xhci_ring_cmd_db(xhci); + } +} + +/* Must be called with xhci->lock held, releases and aquires lock back */ +static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags) { u64 temp_64; int ret; xhci_dbg(xhci, "Abort command ring\n"); - temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); - xhci->cmd_ring_state = CMD_RING_STATE_ABORTED; + reinit_completion(&xhci->cmd_ring_stop_completion); - /* - * Writing the CMD_RING_ABORT bit should cause a cmd completion event, - * however on some host hw the CMD_RING_RUNNING bit is correctly cleared - * but the completion event in never sent. Use the cmd timeout timer to - * handle those cases. Use twice the time to cover the bit polling retry - */ - xhci_mod_cmd_timer(xhci, 2 * XHCI_CMD_DEFAULT_TIMEOUT); + temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, &xhci->op_regs->cmd_ring); @@ -301,16 +349,30 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci) udelay(1000); ret = xhci_handshake(&xhci->op_regs->cmd_ring, CMD_RING_RUNNING, 0, 3 * 1000 * 1000); - if (ret == 0) - return 0; - - xhci_err(xhci, "Stopped the command ring failed, " - "maybe the host is dead\n"); - cancel_delayed_work(&xhci->cmd_timer); - xhci->xhc_state |= XHCI_STATE_DYING; - xhci_quiesce(xhci); - xhci_halt(xhci); - return -ESHUTDOWN; + if (ret < 0) { + xhci_err(xhci, "Stopped the command ring failed, " + "maybe the host is dead\n"); + xhci->xhc_state |= XHCI_STATE_DYING; + xhci_quiesce(xhci); + xhci_halt(xhci); + return -ESHUTDOWN; + } + } + /* + * Writing the CMD_RING_ABORT bit should cause a cmd completion event, + * however on some host hw the CMD_RING_RUNNING bit is correctly cleared + * but the completion event in never sent. Wait 2 secs (arbitrary + * number) to handle those cases after negation of CMD_RING_RUNNING. + */ + spin_unlock_irqrestore(&xhci->lock, flags); + ret = wait_for_completion_timeout(&xhci->cmd_ring_stop_completion, + msecs_to_jiffies(2000)); + spin_lock_irqsave(&xhci->lock, flags); + if (!ret) { + xhci_dbg(xhci, "No stop event for abort, ring start fail?\n"); + xhci_cleanup_command_queue(xhci); + } else { + xhci_handle_stopped_cmd_ring(xhci, xhci_next_queued_cmd(xhci)); } return 0; @@ -1216,64 +1278,12 @@ void xhci_cleanup_command_queue(struct xhci_hcd *xhci) xhci_complete_del_and_free_cmd(cur_cmd, COMP_CMD_ABORT); } -/* - * Turn all commands on command ring with status set to "aborted" to no-op trbs. - * If there are other commands waiting then restart the ring and kick the timer. - * This must be called with command ring stopped and xhci->lock held. - */ -static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, - struct xhci_command *cur_cmd) -{ - struct xhci_command *i_cmd, *tmp_cmd; - u32 cycle_state; - - /* Turn all aborted commands in list to no-ops, then restart */ - list_for_each_entry_safe(i_cmd, tmp_cmd, &xhci->cmd_list, - cmd_list) { - - if (i_cmd->status != COMP_CMD_ABORT) - continue; - - i_cmd->status = COMP_CMD_STOP; - - xhci_dbg(xhci, "Turn aborted command %p to no-op\n", - i_cmd->command_trb); - /* get cycle state from the original cmd trb */ - cycle_state = le32_to_cpu( - i_cmd->command_trb->generic.field[3]) & TRB_CYCLE; - /* modify the command trb to no-op command */ - i_cmd->command_trb->generic.field[0] = 0; - i_cmd->command_trb->generic.field[1] = 0; - i_cmd->command_trb->generic.field[2] = 0; - i_cmd->command_trb->generic.field[3] = cpu_to_le32( - TRB_TYPE(TRB_CMD_NOOP) | cycle_state); - - /* - * caller waiting for completion is called when command - * completion event is received for these no-op commands - */ - } - - xhci->cmd_ring_state = CMD_RING_STATE_RUNNING; - - /* ring command ring doorbell to restart the command ring */ - if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && - !(xhci->xhc_state & XHCI_STATE_DYING)) { - xhci->current_cmd = cur_cmd; - xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); - xhci_ring_cmd_db(xhci); - } - return; -} - - void xhci_handle_command_timeout(struct work_struct *work) { struct xhci_hcd *xhci; int ret; unsigned long flags; u64 hw_ring_state; - bool second_timeout = false; xhci = container_of(to_delayed_work(work), struct xhci_hcd, cmd_timer); @@ -1287,18 +1297,17 @@ void xhci_handle_command_timeout(struct work_struct *work) spin_unlock_irqrestore(&xhci->lock, flags); return; } - /* mark this command to be cancelled */ - if (xhci->current_cmd->status == COMP_CMD_ABORT) - second_timeout = true; xhci->current_cmd->status = COMP_CMD_ABORT; /* Make sure command ring is running before aborting it */ hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) && (hw_ring_state & CMD_RING_RUNNING)) { + /* Prevent new doorbell, and start command abort */ + xhci->cmd_ring_state = CMD_RING_STATE_ABORTED; xhci_dbg(xhci, "Command timeout\n"); - ret = xhci_abort_cmd_ring(xhci); + ret = xhci_abort_cmd_ring(xhci, flags); if (unlikely(ret == -ESHUTDOWN)) { xhci_err(xhci, "Abort command ring failed\n"); xhci_cleanup_command_queue(xhci); @@ -1312,9 +1321,9 @@ void xhci_handle_command_timeout(struct work_struct *work) goto time_out_completed; } - /* command ring failed to restart, or host removed. Bail out */ - if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) { - xhci_dbg(xhci, "command timed out twice, ring start fail?\n"); + /* host removed. Bail out */ + if (xhci->xhc_state & XHCI_STATE_REMOVING) { + xhci_dbg(xhci, "host removed, ring start fail?\n"); xhci_cleanup_command_queue(xhci); goto time_out_completed; @@ -1365,7 +1374,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, /* If CMD ring stopped we own the trbs between enqueue and dequeue */ if (cmd_comp_code == COMP_CMD_STOP) { - xhci_handle_stopped_cmd_ring(xhci, cmd); + complete_all(&xhci->cmd_ring_stop_completion); return; } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index da5bfd6ced12..c525722aa934 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1572,6 +1572,7 @@ struct xhci_hcd { struct list_head cmd_list; unsigned int cmd_ring_reserved_trbs; struct delayed_work cmd_timer; + struct completion cmd_ring_stop_completion; struct xhci_command *current_cmd; struct xhci_ring *event_ring; struct xhci_erst erst; -- GitLab From 6af3ba285acb67b092f6f40033f6ab848eba1a15 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 1 Apr 2016 17:13:12 +0300 Subject: [PATCH 0270/1442] usb: dwc3: pci: add Intel Gemini Lake PCI ID commit 8f8983a5683623b62b339d159573f95a1fce44f3 upstream. Intel Gemini Lake SoC has the same DWC3 than Broxton. Add the new ID to the supported Devices. Signed-off-by: Heikki Krogerus Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 6df0f5dad9a4..788853b5b215 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -38,6 +38,7 @@ #define PCI_DEVICE_ID_INTEL_BXT_M 0x1aaa #define PCI_DEVICE_ID_INTEL_APL 0x5aaa #define PCI_DEVICE_ID_INTEL_KBP 0xa2b0 +#define PCI_DEVICE_ID_INTEL_GLK 0x31aa static const struct acpi_gpio_params reset_gpios = { 0, 0, false }; static const struct acpi_gpio_params cs_gpios = { 1, 0, false }; @@ -229,6 +230,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT_M), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_GLK), }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), }, { } /* Terminating Entry */ }; -- GitLab From 81f1f24d1873f56c6810fcc1ca5d2ae965c01dfc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 27 Dec 2016 13:13:42 +0200 Subject: [PATCH 0271/1442] usb: dwc3: pci: Fix dr_mode misspelling commit 51c1685d956221576e165dd88a20063b169bae5a upstream. usb_get_dr_mode() expects the device-property to be spelled "dr_mode" not "dr-mode". Spelling it properly fixes the following warning showing up in dmesg: [ 8704.500545] dwc3 dwc3.2.auto: Configuration mismatch. dr_mode forced to gadget Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 788853b5b215..427291a19e6d 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -82,7 +82,7 @@ static int dwc3_pci_quirks(struct pci_dev *pdev, struct platform_device *dwc3) int ret; struct property_entry properties[] = { - PROPERTY_ENTRY_STRING("dr-mode", "peripheral"), + PROPERTY_ENTRY_STRING("dr_mode", "peripheral"), { } }; -- GitLab From 5e3c2920e9f2f21d132df92318c5df2751da0b1f Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 3 Jan 2017 14:32:09 +0200 Subject: [PATCH 0272/1442] usb: dwc3: gadget: Fix full speed mode commit 9418ee15f718939aa7e650fd586d73765eb21f20 upstream. DCFG.DEVSPD == 0x3 is not valid and we need to set DCFG.DEVSPD to 0x1 for full speed mode. Same goes for DSTS.CONNECTSPD. Old databooks had 0x3 for full speed in 48MHz mode for USB1.1 transceivers which was never supported. Newer databooks don't mention 0x3 at all. Cc: John Youn Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 6 ++---- drivers/usb/dwc3/gadget.c | 5 ++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index cc035e9eed86..884c43714456 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -301,9 +301,8 @@ #define DWC3_DCFG_SUPERSPEED_PLUS (5 << 0) /* DWC_usb31 only */ #define DWC3_DCFG_SUPERSPEED (4 << 0) #define DWC3_DCFG_HIGHSPEED (0 << 0) -#define DWC3_DCFG_FULLSPEED2 (1 << 0) +#define DWC3_DCFG_FULLSPEED (1 << 0) #define DWC3_DCFG_LOWSPEED (2 << 0) -#define DWC3_DCFG_FULLSPEED1 (3 << 0) #define DWC3_DCFG_NUMP_SHIFT 17 #define DWC3_DCFG_NUMP(n) (((n) >> DWC3_DCFG_NUMP_SHIFT) & 0x1f) @@ -395,9 +394,8 @@ #define DWC3_DSTS_SUPERSPEED_PLUS (5 << 0) /* DWC_usb31 only */ #define DWC3_DSTS_SUPERSPEED (4 << 0) #define DWC3_DSTS_HIGHSPEED (0 << 0) -#define DWC3_DSTS_FULLSPEED2 (1 << 0) +#define DWC3_DSTS_FULLSPEED (1 << 0) #define DWC3_DSTS_LOWSPEED (2 << 0) -#define DWC3_DSTS_FULLSPEED1 (3 << 0) /* Device Generic Command Register */ #define DWC3_DGCMD_SET_LMP 0x01 diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b3687e223e00..7eff0412e3bb 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1606,7 +1606,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) reg |= DWC3_DCFG_LOWSPEED; break; case USB_SPEED_FULL: - reg |= DWC3_DCFG_FULLSPEED1; + reg |= DWC3_DCFG_FULLSPEED; break; case USB_SPEED_HIGH: reg |= DWC3_DCFG_HIGHSPEED; @@ -2465,8 +2465,7 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc) dwc->gadget.ep0->maxpacket = 64; dwc->gadget.speed = USB_SPEED_HIGH; break; - case DWC3_DSTS_FULLSPEED2: - case DWC3_DSTS_FULLSPEED1: + case DWC3_DSTS_FULLSPEED: dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(64); dwc->gadget.ep0->maxpacket = 64; dwc->gadget.speed = USB_SPEED_FULL; -- GitLab From d7550d7d6ae2c769a32a48d7a02c351dc61efe3f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 3 Jan 2017 18:13:48 -0600 Subject: [PATCH 0273/1442] usb: musb: Fix trying to free already-free IRQ 4 commit 8c300fe282fa254ea730c92cb0983e2642dc1fff upstream. When unloading omap2430, we can get the following splat: WARNING: CPU: 1 PID: 295 at kernel/irq/manage.c:1478 __free_irq+0xa8/0x2c8 Trying to free already-free IRQ 4 ... [] (free_irq) from [] (musbhs_dma_controller_destroy+0x28/0xb0 [musb_hdrc]) [] (musbhs_dma_controller_destroy [musb_hdrc]) from [] (musb_remove+0xf0/0x12c [musb_hdrc]) [] (musb_remove [musb_hdrc]) from [] (platform_drv_remove+0x24/0x3c) ... This is because the irq number in use is 260 nowadays, and the dma controller is using u8 instead of int. Fixes: 6995eb68aab7 ("USB: musb: enable low level DMA operation for Blackfin") Signed-off-by: Tony Lindgren [b-liu@ti.com: added Fixes tag] Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musbhsdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/musbhsdma.h b/drivers/usb/musb/musbhsdma.h index f7b13fd25257..a3dcbd55e436 100644 --- a/drivers/usb/musb/musbhsdma.h +++ b/drivers/usb/musb/musbhsdma.h @@ -157,5 +157,5 @@ struct musb_dma_controller { void __iomem *base; u8 channel_count; u8 used_channels; - u8 irq; + int irq; }; -- GitLab From 8ecf70fb0889754e27bbc2852fa331734dc4d61a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 14 Dec 2016 15:37:30 +0100 Subject: [PATCH 0274/1442] usb: hub: Move hub_port_disable() to fix warning if PM is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3bc02bce908c7250781376052248f5cd60a4e3d4 upstream. If CONFIG_PM=n: drivers/usb/core/hub.c:107: warning: ‘hub_usb3_port_prepare_disable’ declared inline after being called drivers/usb/core/hub.c:107: warning: previous declaration of ‘hub_usb3_port_prepare_disable’ was here To fix this, move hub_port_disable() after hub_usb3_port_prepare_disable(), and adjust forward declarations. Fixes: 37be66767e3cae4f ("usb: hub: Fix auto-remount of safely removed or ejected USB-3 devices") Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 59 +++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 0d81436c94bd..aef81a16e2c8 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -101,8 +101,7 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); -static void hub_usb3_port_prepare_disable(struct usb_hub *hub, - struct usb_port *port_dev); +static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); static inline char *portspeed(struct usb_hub *hub, int portstatus) { @@ -900,34 +899,6 @@ static int hub_set_port_link_state(struct usb_hub *hub, int port1, USB_PORT_FEAT_LINK_STATE); } -/* - * USB-3 does not have a similar link state as USB-2 that will avoid negotiating - * a connection with a plugged-in cable but will signal the host when the cable - * is unplugged. Disable remote wake and set link state to U3 for USB-3 devices - */ -static int hub_port_disable(struct usb_hub *hub, int port1, int set_state) -{ - struct usb_port *port_dev = hub->ports[port1 - 1]; - struct usb_device *hdev = hub->hdev; - int ret = 0; - - if (!hub->error) { - if (hub_is_superspeed(hub->hdev)) { - hub_usb3_port_prepare_disable(hub, port_dev); - ret = hub_set_port_link_state(hub, port_dev->portnum, - USB_SS_PORT_LS_U3); - } else { - ret = usb_clear_port_feature(hdev, port1, - USB_PORT_FEAT_ENABLE); - } - } - if (port_dev->child && set_state) - usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED); - if (ret && ret != -ENODEV) - dev_err(&port_dev->dev, "cannot disable (err = %d)\n", ret); - return ret; -} - /* * Disable a port and mark a logical connect-change event, so that some * time later hub_wq will disconnect() any existing usb_device on the port @@ -4153,6 +4124,34 @@ static int hub_handle_remote_wakeup(struct usb_hub *hub, unsigned int port, #endif /* CONFIG_PM */ +/* + * USB-3 does not have a similar link state as USB-2 that will avoid negotiating + * a connection with a plugged-in cable but will signal the host when the cable + * is unplugged. Disable remote wake and set link state to U3 for USB-3 devices + */ +static int hub_port_disable(struct usb_hub *hub, int port1, int set_state) +{ + struct usb_port *port_dev = hub->ports[port1 - 1]; + struct usb_device *hdev = hub->hdev; + int ret = 0; + + if (!hub->error) { + if (hub_is_superspeed(hub->hdev)) { + hub_usb3_port_prepare_disable(hub, port_dev); + ret = hub_set_port_link_state(hub, port_dev->portnum, + USB_SS_PORT_LS_U3); + } else { + ret = usb_clear_port_feature(hdev, port1, + USB_PORT_FEAT_ENABLE); + } + } + if (port_dev->child && set_state) + usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED); + if (ret && ret != -ENODEV) + dev_err(&port_dev->dev, "cannot disable (err = %d)\n", ret); + return ret; +} + /* USB 2.0 spec, 7.1.7.3 / fig 7-29: * -- GitLab From 64244edf304bf06389e2b95ebd17fcc6e86e553c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20H=C3=A4dicke?= Date: Thu, 29 Dec 2016 23:02:11 +0100 Subject: [PATCH 0275/1442] usb: gadget: udc: core: fix return code of usb_gadget_probe_driver() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7b01738112608ce47083178ae2b9ebadf02d32cc upstream. This fixes a regression which was introduced by commit f1bddbb, by reverting a small fragment of commit 855ed04. If the following conditions were met, usb_gadget_probe_driver() returned 0, although the call was unsuccessful: 1. A particular UDC was specified by thge gadget driver (using member "udc_name" of struct usb_gadget_driver). 2. The UDC with this name is available. 3. Another gadget driver is already bound to this gadget. 4. The gadget driver has the "match_existing_only" flag set. In this case, the return code variable "ret" is set to 0, the return code of a strcmp() call (to check for the second condition). This also fixes an oops which could occur in the following scenario: 1. Two usb gadget instances were configured using configfs. 2. The first gadget configuration was bound to a UDC (using the configfs attribute "UDC"). 3. It was tried to bind the second gadget configuration to the same UDC in the same way. This operation was then wrongly reported as being successful. 4. The second gadget configuration's "UDC" attribute is cleared, to unbind the (not really bound) second gadget configuration from the UDC. ] __list_del_entry+0x29/0xc0 PGD 41b4c5067 PUD 41a598067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: cdc_acm usb_f_fs usb_f_serial usb_f_acm u_serial libcomposite configfs dummy_hcd bnep intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm snd_hda_codec_hdmi irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd snd_hda_codec_realtek snd_hda_codec_generic serio_raw uvcvideo videobuf2_vmalloc btusb snd_usb_audio snd_hda_intel videobuf2_memops btrtl snd_hda_codec snd_hda_core snd_usbmidi_lib btbcm videobuf2_v4l2 btintel snd_hwdep videobuf2_core snd_seq_midi bluetooth snd_seq_midi_event videodev xpad efi_pstore snd_pcm_oss rfkill joydev media crc16 ff_memless snd_mixer_oss snd_rawmidi nls_ascii snd_pcm snd_seq snd_seq_device nls_cp437 mei_me snd_timer vfat sg udc_core lpc_ich fat efivars mfd_core mei snd soundcore battery nuvoton_cir rc_core evdev intel_smartconnect ie31200_edac edac_core shpchp tpm_tis tpm_tis_core tpm parport_pc ppdev lp parport efivarfs autofs4 btrfs xor raid6_pq hid_logitech_hidpp hid_logitech_dj hid_generic usbhid hid uas usb_storage sr_mod cdrom sd_mod ahci libahci nouveau i915 crc32c_intel i2c_algo_bit psmouse ttm xhci_pci libata scsi_mod ehci_pci drm_kms_helper xhci_hcd ehci_hcd r8169 mii usbcore drm nvme nvme_core fjes button [last unloaded: net2280] CPU: 5 PID: 829 Comm: bash Not tainted 4.9.0-rc7 #1 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./Z77 Extreme3, BIOS P1.50 07/11/2013 task: ffff880419ce4040 task.stack: ffffc90002ed4000 RIP: 0010:[] [] __list_del_entry+0x29/0xc0 RSP: 0018:ffffc90002ed7d68 EFLAGS: 00010207 RAX: 0000000000000000 RBX: ffff88041787ec30 RCX: dead000000000200 RDX: 0000000000000000 RSI: ffff880417482002 RDI: ffff88041787ec30 RBP: ffffc90002ed7d68 R08: 0000000000000000 R09: 0000000000000010 R10: 0000000000000000 R11: ffff880419ce4040 R12: ffff88041787eb68 R13: ffff88041787eaa8 R14: ffff88041560a2c0 R15: 0000000000000001 FS: 00007fe4e49b8700(0000) GS:ffff88042f340000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000041b4c4000 CR4: 00000000001406e0 Stack: ffffc90002ed7d80 ffffffff94f5e68d ffffffffc0ae5ef0 ffffc90002ed7da0 ffffffffc0ae22aa ffff88041787e800 ffff88041787e800 ffffc90002ed7dc0 ffffffffc0d7a727 ffffffff952273fa ffff88041aba5760 ffffc90002ed7df8 Call Trace: [] list_del+0xd/0x30 [] usb_gadget_unregister_driver+0xaa/0xc0 [udc_core] [] unregister_gadget+0x27/0x60 [libcomposite] [] ? mutex_lock+0x1a/0x30 [] gadget_dev_desc_UDC_store+0x88/0xe0 [libcomposite] [] configfs_write_file+0xa0/0x100 [configfs] [] __vfs_write+0x37/0x160 [] ? __fd_install+0x30/0xd0 [] ? _raw_spin_unlock+0xe/0x10 [] vfs_write+0xb8/0x1b0 [] SyS_write+0x58/0xc0 [] ? __close_fd+0x94/0xc0 [] entry_SYSCALL_64_fastpath+0x1e/0xad Code: 66 90 55 48 8b 07 48 b9 00 01 00 00 00 00 ad de 48 8b 57 08 48 89 e5 48 39 c8 74 29 48 b9 00 02 00 00 00 00 ad de 48 39 ca 74 3a <4c> 8b 02 4c 39 c7 75 52 4c 8b 40 08 4c 39 c7 75 66 48 89 50 08 RIP [] __list_del_entry+0x29/0xc0 RSP CR2: 0000000000000000 ---[ end trace 99fc090ab3ff6cbc ]--- Fixes: f1bddbb ("usb: gadget: Fix binding to UDC via configfs interface") Signed-off-by: Felix Hädicke Tested-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 9483489080f6..0402177f93cd 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1317,7 +1317,11 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver) if (!ret) break; } - if (!ret && !udc->driver) + if (ret) + ret = -ENODEV; + else if (udc->driver) + ret = -EBUSY; + else goto found; } else { list_for_each_entry(udc, &udc_list, list) { -- GitLab From 2eb09ccfa45b4367a7f416936cf757b95063d015 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= Date: Tue, 3 Jan 2017 18:13:52 -0600 Subject: [PATCH 0276/1442] usb: musb: blackfin: add bfin_fifo_offset in bfin_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5563bb5743cb09bde0d0f4660a5e5b19c26903bf upstream. The function bfin_fifo_offset is defined but not used: drivers/usb/musb/blackfin.c:36:12: warning: ‘bfin_fifo_offset’ defined but not used [-Wunused-function] static u32 bfin_fifo_offset(u8 epnum) ^~~~~~~~~~~~~~~~ Adding bfin_fifo_offset to bfin_ops fixes this warning and allows musb core to call this function instead of default_fifo_offset. Fixes: cc92f6818f6e ("usb: musb: Populate new IO functions for blackfin") Signed-off-by: Jérémy Lefaure Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/blackfin.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c index 310238c6b5cd..896798071817 100644 --- a/drivers/usb/musb/blackfin.c +++ b/drivers/usb/musb/blackfin.c @@ -469,6 +469,7 @@ static const struct musb_platform_ops bfin_ops = { .init = bfin_musb_init, .exit = bfin_musb_exit, + .fifo_offset = bfin_fifo_offset, .readb = bfin_readb, .writeb = bfin_writeb, .readw = bfin_readw, -- GitLab From e7c72dccd7250c8379fef1cc2551c3c55009a4e4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Nov 2016 22:28:40 +0100 Subject: [PATCH 0277/1442] ALSA: usb-audio: Fix bogus error return in snd_usb_create_stream() commit 4763601a56f155ddf94ef35fc2c41504a2de15f5 upstream. The function returns -EINVAL even if it builds the stream properly. The bogus error code sneaked in during the code refactoring, but it wasn't noticed until now since the returned error code itself is ignored in anyway. Kill it here, but there is no behavior change by this patch, obviously. Fixes: e5779998bf8b ('ALSA: usb-audio: refactor code') Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/card.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 2ddc034673a8..f36cb068dad3 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -206,7 +206,6 @@ static int snd_usb_create_stream(struct snd_usb_audio *chip, int ctrlif, int int if (! snd_usb_parse_audio_interface(chip, interface)) { usb_set_interface(dev, interface, 0); /* reset the current interface */ usb_driver_claim_interface(&usb_audio_driver, iface, (void *)-1L); - return -EINVAL; } return 0; -- GitLab From 8da83724d4918b35700fcbce4bfeac39fa2138f5 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 29 Nov 2016 16:55:02 +0100 Subject: [PATCH 0278/1442] USB: serial: kl5kusb105: abort on open exception path commit 3c3dd1e058cb01e835dcade4b54a6f13ffaeaf7c upstream. Function klsi_105_open() calls usb_control_msg() (to "enable read") and checks its return value. When the return value is unexpected, it only assigns the error code to the return variable retval, but does not terminate the exception path. This patch fixes the bug by inserting "goto err_generic_close;" when the call to usb_control_msg() fails. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Pan Bian [johan: rebase on prerequisite fix and amend commit message] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/kl5kusb105.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index 6f29bfadbe33..0ee190fc1bf8 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -311,6 +311,7 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) if (rc < 0) { dev_err(&port->dev, "Enabling read failed (error = %d)\n", rc); retval = rc; + goto err_generic_close; } else dev_dbg(&port->dev, "%s - enabled reading\n", __func__); @@ -337,6 +338,7 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) 0, /* index */ NULL, 0, KLSI_TIMEOUT); +err_generic_close: usb_serial_generic_close(port); err_free_cfg: kfree(cfg); -- GitLab From b429e37b80fd6738ce3d1d6450cc5b4f40ac12a4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Nov 2016 15:42:41 -0800 Subject: [PATCH 0279/1442] usb: gadget: Fix second argument of percpu_ida_alloc() commit 03274445c01562d5352ea522431ab8c6175e2bbf upstream. Pass a task state as second argument to percpu_ida_alloc(). Fixes: commit 71e7ae8e1fb2 ("usb-gadget/tcm: Conversion to percpu_ida tag pre-allocation") Signed-off-by: Bart Van Assche Cc: Nicholas Bellinger Cc: Andrzej Pietrasiewicz Cc: Sebastian Andrzej Siewior Cc: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_tcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index 197f73386fac..d2351139342f 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c @@ -1073,7 +1073,7 @@ static struct usbg_cmd *usbg_get_cmd(struct f_uas *fu, struct usbg_cmd *cmd; int tag; - tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC); + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING); if (tag < 0) return ERR_PTR(-ENOMEM); -- GitLab From a10a1b797a0fc6b352c69826c5e9cf09e2056250 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 8 Nov 2016 10:08:24 +0800 Subject: [PATCH 0280/1442] usb: gadget: fix request length error for isoc transfer commit 982555fc26f9d8bcdbd5f9db0378fe0682eb4188 upstream. For isoc endpoint descriptor, the wMaxPacketSize is not real max packet size (see Table 9-13. Standard Endpoint Descriptor, USB 2.0 specifcation), it may contain the number of packet, so the real max packet should be ep->desc->wMaxPacketSize && 0x7ff. Cc: Felipe F. Tonello Cc: Felipe Balbi Fixes: 16b114a6d797 ("usb: gadget: fix usb_ep_align_maybe endianness and new usb_ep_aligna") Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gadget.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 8e81f9eb95e4..e4516e9ded0f 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -429,7 +429,9 @@ static inline struct usb_gadget *dev_to_usb_gadget(struct device *dev) */ static inline size_t usb_ep_align(struct usb_ep *ep, size_t len) { - return round_up(len, (size_t)le16_to_cpu(ep->desc->wMaxPacketSize)); + int max_packet_size = (size_t)usb_endpoint_maxp(ep->desc) & 0x7ff; + + return round_up(len, max_packet_size); } /** -- GitLab From d387f98cb0c1e70ad45daacfb96acec4a5e0cb60 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 7 Nov 2016 20:07:07 +0100 Subject: [PATCH 0281/1442] ARM: dts: r8a7794: Correct hsusb parent clock commit dc8ee9dbdba509fb58e23ba79f2e6059fe5d8b3b upstream. The parent clock of the HSUSB clock is the HP clock, not the MP clock. Fixes: c7bab9f929e51761 ("ARM: shmobile: r8a7794: Add USB clocks to device tree") Signed-off-by: Geert Uytterhoeven Acked-by: Yoshihiro Shimoda Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/r8a7794.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/r8a7794.dtsi b/arch/arm/boot/dts/r8a7794.dtsi index 9365580a194f..725ecb3c5fb4 100644 --- a/arch/arm/boot/dts/r8a7794.dtsi +++ b/arch/arm/boot/dts/r8a7794.dtsi @@ -1260,7 +1260,7 @@ mstp7_clks: mstp7_clks@e615014c { compatible = "renesas,r8a7794-mstp-clocks", "renesas,cpg-mstp-clocks"; reg = <0 0xe615014c 0 4>, <0 0xe61501c4 0 4>; - clocks = <&mp_clk>, <&mp_clk>, + clocks = <&mp_clk>, <&hp_clk>, <&zs_clk>, <&p_clk>, <&p_clk>, <&zs_clk>, <&zs_clk>, <&p_clk>, <&p_clk>, <&p_clk>, <&p_clk>, <&zx_clk>; -- GitLab From 93f6891a383f92f88793da1e46cb018bdef8c18a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 11:40:25 +0100 Subject: [PATCH 0282/1442] USB: phy: am335x-control: fix device and of_node leaks commit 015105b12183556771e111e93f5266851e7c5582 upstream. Make sure to drop the references taken by of_parse_phandle() and bus_find_device() before returning from am335x_get_phy_control(). Note that there is no guarantee that the devres-managed struct phy_control will be valid for the lifetime of the sibling phy device regardless of this change. Fixes: 3bb869c8b3f1 ("usb: phy: Add AM335x PHY driver") Acked-by: Bin Liu Signed-off-by: Johan Hovold Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-am335x-control.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/phy/phy-am335x-control.c b/drivers/usb/phy/phy-am335x-control.c index 42a1afe36a90..5f5f19813fde 100644 --- a/drivers/usb/phy/phy-am335x-control.c +++ b/drivers/usb/phy/phy-am335x-control.c @@ -134,10 +134,12 @@ struct phy_control *am335x_get_phy_control(struct device *dev) return NULL; dev = bus_find_device(&platform_bus_type, NULL, node, match); + of_node_put(node); if (!dev) return NULL; ctrl_usb = dev_get_drvdata(dev); + put_device(dev); if (!ctrl_usb) return NULL; return &ctrl_usb->phy_ctrl; -- GitLab From 5ef54ae37b130ba8c01b90e451d059c9fa2dbfa3 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sat, 24 Sep 2016 17:14:21 +0800 Subject: [PATCH 0283/1442] arm64: dts: hip06: Correct hardware pin number of usb node commit 4d75a171b67ffc3f4dadbd654c9d281091300eb2 upstream. The ohci/ehci hardware pin number should be 640/641, correct them. Fixes: commit aa8d3e74f54d ("arm64: dts: Add initial dts for Hisilicon Hip06 D03 board") Signed-off-by: Kefeng Wang Signed-off-by: Wei Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/hisilicon/hip06.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/hisilicon/hip06.dtsi b/arch/arm64/boot/dts/hisilicon/hip06.dtsi index b548763366dd..af450413b9dd 100644 --- a/arch/arm64/boot/dts/hisilicon/hip06.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hip06.dtsi @@ -322,7 +322,7 @@ compatible = "generic-ohci"; reg = <0x0 0xa7030000 0x0 0x10000>; interrupt-parent = <&mbigen_usb>; - interrupts = <64 4>; + interrupts = <640 4>; dma-coherent; status = "disabled"; }; @@ -331,7 +331,7 @@ compatible = "generic-ehci"; reg = <0x0 0xa7020000 0x0 0x10000>; interrupt-parent = <&mbigen_usb>; - interrupts = <65 4>; + interrupts = <641 4>; dma-coherent; status = "disabled"; }; -- GitLab From 5f6136debf7eefa8e585c2fa52b72403f324fe77 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 15 Nov 2016 21:51:04 +0800 Subject: [PATCH 0284/1442] ARM: dts: sun7i: bananapi-m1-plus: Enable USB PHY for USB host support commit 0cff18cbab4f55581d9da86e4286655d9723d7d2 upstream. The 2 USB host ports are directly tied to the 2 USB hosts in the SoC. The 2 host pairs were already enabled, but the USB PHY wasn't. VBUS on the 2 ports are always on. Enable the USB PHY. Fixes: 04c85ecad32a ("ARM: dts: sun7i: Add dts file for Bananapi M1 Plus board") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts index ba5bca0fe997..44377a98cc89 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts @@ -227,3 +227,8 @@ pinctrl-0 = <&uart0_pins_a>; status = "okay"; }; + +&usbphy { + /* VBUS on usb host ports are tied to DC5V and therefore always on */ + status = "okay"; +}; -- GitLab From fb0a00fb0843136baee7812b7319c2b4db2592aa Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 29 Oct 2016 23:36:24 -0200 Subject: [PATCH 0285/1442] dibusb: fix possible memory leak in dibusb_rc_query() commit 1f5ecaf985c46889278f51fcb7bc143f60f4eb14 upstream. 'buf' is malloced in dibusb_rc_query() and should be freed before leaving from the error handling cases, otherwise it will cause memory leak. Fixes: ff1c123545d7 ("[media] dibusb: handle error code on RC query") Signed-off-by: Wei Yongjun Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/dibusb-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c index de3ee2547479..8207e6900656 100644 --- a/drivers/media/usb/dvb-usb/dibusb-common.c +++ b/drivers/media/usb/dvb-usb/dibusb-common.c @@ -382,9 +382,9 @@ int dibusb_rc_query(struct dvb_usb_device *d, u32 *event, int *state) if (buf[0] != 0) deb_info("key: %*ph\n", 5, buf); +ret: kfree(buf); -ret: return ret; } EXPORT_SYMBOL(dibusb_rc_query); -- GitLab From c1ec6ba3d7c71a28d9a14f4abb1999fd5db80e2f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Jan 2017 16:39:46 +0100 Subject: [PATCH 0286/1442] USB: serial: io_ti: bind to interface after fw download commit e35d6d7c4e6532a89732cf4bace0e910ee684c88 upstream. Bind to the interface, but do not register any ports, after having downloaded the firmware. The device will still disconnect and re-enumerate, but this way we avoid an error messages from being logged as part of the process: io_ti: probe of 1-1.3:1.0 failed with error -5 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/io_ti.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 8dab8e5e339e..c02808a30436 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1499,8 +1499,7 @@ static int do_boot_mode(struct edgeport_serial *serial, dev_dbg(dev, "%s - Download successful -- Device rebooting...\n", __func__); - /* return an error on purpose */ - return -ENODEV; + return 1; } stayinbootmode: -- GitLab From af776953108b860bf6b4ccb7040969b93fb75080 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Fri, 11 Nov 2016 03:00:09 +0200 Subject: [PATCH 0287/1442] mei: fix parameter rename KDoc commit 967b274e02e18c9fbb4d19b96a89bd0afbc77b7a upstream. Parameter renaming to fop_type was not reflected in KDoc Fixes: 3030dc0564594 (mei: add wrapper for queuing control commands) Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index f999a8d3c9c4..6d948799d8c5 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -425,7 +425,7 @@ static inline void mei_io_list_free(struct mei_cl_cb *list, struct mei_cl *cl) * * @cl: host client * @length: size of the buffer - * @type: operation type + * @fop_type: operation type * @fp: associated file pointer (might be NULL) * * Return: cb on success and NULL on failure @@ -459,7 +459,7 @@ struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length, * * @cl: host client * @length: size of the buffer - * @type: operation type + * @fop_type: operation type * @fp: associated file pointer (might be NULL) * * Return: cb on success and NULL on failure -- GitLab From 5d46c4e9a05e4e6cebddf49f7483127390306f5c Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Wed, 14 Dec 2016 17:56:51 +0200 Subject: [PATCH 0288/1442] mei: bus: fix mei_cldev_enable KDoc commit 5026c9cb0744a9cd40242743ca91a5d712f468c6 upstream. Adjust function name in KDoc. Fixes: d49dc5e76fc9 (mei: bus: use mei_cldev_ prefix for the API functions) Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 8cac7ef9ad0d..dbe676de7a19 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -408,7 +408,7 @@ bool mei_cldev_enabled(struct mei_cl_device *cldev) EXPORT_SYMBOL_GPL(mei_cldev_enabled); /** - * mei_cldev_enable_device - enable me client device + * mei_cldev_enable - enable me client device * create connection with me client * * @cldev: me client device -- GitLab From fc322290fc06e53aa515f7db575f9a6c3eb84029 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Wed, 14 Dec 2016 17:56:52 +0200 Subject: [PATCH 0289/1442] mei: move write cb to completion on credentials failures commit e09ee853c92011860a4bd2fbdf6126f60fc16bd3 upstream. The credentials handling was pushed to the write handlers but error handling wasn't done properly. Move write callbacks to completion queue to destroy them and to notify a blocked writer about the failure Fixes: 136698e535cd1 (mei: push credentials inside the irq write handler) Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 6d948799d8c5..e2af61f7e3b6 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -1536,7 +1536,7 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, rets = first_chunk ? mei_cl_tx_flow_ctrl_creds(cl) : 1; if (rets < 0) - return rets; + goto err; if (rets == 0) { cl_dbg(dev, cl, "No flow control credentials: not sending.\n"); @@ -1570,11 +1570,8 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, cb->buf.size, cb->buf_idx); rets = mei_write_message(dev, &mei_hdr, buf->data + cb->buf_idx); - if (rets) { - cl->status = rets; - list_move_tail(&cb->list, &cmpl_list->list); - return rets; - } + if (rets) + goto err; cl->status = 0; cl->writing_state = MEI_WRITING; @@ -1582,14 +1579,21 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, cb->completed = mei_hdr.msg_complete == 1; if (first_chunk) { - if (mei_cl_tx_flow_ctrl_creds_reduce(cl)) - return -EIO; + if (mei_cl_tx_flow_ctrl_creds_reduce(cl)) { + rets = -EIO; + goto err; + } } if (mei_hdr.msg_complete) list_move_tail(&cb->list, &dev->write_waiting_list.list); return 0; + +err: + cl->status = rets; + list_move_tail(&cb->list, &cmpl_list->list); + return rets; } /** -- GitLab From a673f99884e355284eba0d6ffa82bc8ec5365554 Mon Sep 17 00:00:00 2001 From: Eva Rachel Retuya Date: Sun, 9 Oct 2016 00:05:39 +0800 Subject: [PATCH 0290/1442] staging: iio: ad7606: fix improper setting of oversampling pins commit b321a38d2407c7e425c54bc09be909a34e49f740 upstream. The oversampling ratio is controlled using the oversampling pins, OS [2:0] with OS2 being the MSB control bit, and OS0 the LSB control bit. The gpio connected to the OS2 pin is not being set correctly, only OS0 and OS1 pins are being set. Fix the typo to allow proper control of the oversampling pins. Signed-off-by: Eva Rachel Retuya Fixes: b9618c0 ("staging: IIO: ADC: New driver for AD7606/AD7606-6/AD7606-4") Acked-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/adc/ad7606_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/adc/ad7606_core.c b/drivers/staging/iio/adc/ad7606_core.c index f79ee61851f6..cbd6bc52050f 100644 --- a/drivers/staging/iio/adc/ad7606_core.c +++ b/drivers/staging/iio/adc/ad7606_core.c @@ -189,7 +189,7 @@ static ssize_t ad7606_store_oversampling_ratio(struct device *dev, mutex_lock(&indio_dev->mlock); gpio_set_value(st->pdata->gpio_os0, (ret >> 0) & 1); gpio_set_value(st->pdata->gpio_os1, (ret >> 1) & 1); - gpio_set_value(st->pdata->gpio_os1, (ret >> 2) & 1); + gpio_set_value(st->pdata->gpio_os2, (ret >> 2) & 1); st->oversampling = lval; mutex_unlock(&indio_dev->mlock); -- GitLab From 96a0c8ee7ce6a32c455ff87d4fc4a69fc87c70d1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 30 Dec 2016 23:54:18 +0100 Subject: [PATCH 0291/1442] iio: accel: st_accel: fix LIS3LV02 reading and scaling commit 65e4345c8ef8811bbb4860fe5f2df10646b7f2e1 upstream. The LIS3LV02 has a special bit that need to be set to get the read values left aligned. Before this patch we get gibberish like this: iio_generic_buffer -a -c10 -n lis3lv02dl_accel (...) 0.000000 -0.010042 -0.642688 19155832931907 0.000000 -0.010042 -0.642688 19155858751073 Which is because we read a raw value for 1g as 64 which is the nominal 1024 for 1g shifted 4 bits to the left by being right-aligned rather than left aligned. Since all other sensors are left aligned, add some code to set the special DAS (data alignment setting) bit to 1 so that the right value is now read like this: iio_generic_buffer -a -c10 -n lis3lv02dl_accel (...) 0.000000 -0.147095 -10.120135 24761614364956 -0.029419 -0.176514 -10.120135 24761631624540 The scaling was weird as well: we have a gain of 1000 for 1g and 3000 for 6g. I don't even remember how I came up with the old values but they are wrong. Fixes: 3acddf74f807 ("iio: st-sensors: add support for lis3lv02d accelerometer") Cc: Lorenzo Bianconi Cc: Giuseppe Barba Cc: Denis Ciocca Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/st_accel_core.c | 12 ++++++++++-- drivers/iio/common/st_sensors/st_sensors_core.c | 9 +++++++++ include/linux/iio/common/st_sensors.h | 12 ++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index ce69048c88e9..3a557e3181ea 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -154,8 +154,8 @@ #define ST_ACCEL_4_FS_MASK 0x80 #define ST_ACCEL_4_FS_AVL_2_VAL 0X00 #define ST_ACCEL_4_FS_AVL_6_VAL 0X01 -#define ST_ACCEL_4_FS_AVL_2_GAIN IIO_G_TO_M_S_2(1024) -#define ST_ACCEL_4_FS_AVL_6_GAIN IIO_G_TO_M_S_2(340) +#define ST_ACCEL_4_FS_AVL_2_GAIN IIO_G_TO_M_S_2(1000) +#define ST_ACCEL_4_FS_AVL_6_GAIN IIO_G_TO_M_S_2(3000) #define ST_ACCEL_4_BDU_ADDR 0x21 #define ST_ACCEL_4_BDU_MASK 0x40 #define ST_ACCEL_4_DRDY_IRQ_ADDR 0x21 @@ -346,6 +346,14 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .addr = ST_ACCEL_1_BDU_ADDR, .mask = ST_ACCEL_1_BDU_MASK, }, + /* + * Data Alignment Setting - needs to be set to get + * left-justified data like all other sensors. + */ + .das = { + .addr = 0x21, + .mask = 0x01, + }, .drdy_irq = { .addr = ST_ACCEL_1_DRDY_IRQ_ADDR, .mask_int1 = ST_ACCEL_1_DRDY_IRQ_INT1_MASK, diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index d5cf7f31eaf9..79c8c7cd70d5 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -401,6 +401,15 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, return err; } + /* set DAS */ + if (sdata->sensor_settings->das.addr) { + err = st_sensors_write_data_with_mask(indio_dev, + sdata->sensor_settings->das.addr, + sdata->sensor_settings->das.mask, 1); + if (err < 0) + return err; + } + if (sdata->int_pin_open_drain) { dev_info(&indio_dev->dev, "set interrupt line to open drain mode\n"); diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 228bd44efa4c..497f2b3a5a62 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -115,6 +115,16 @@ struct st_sensor_bdu { u8 mask; }; +/** + * struct st_sensor_das - ST sensor device data alignment selection + * @addr: address of the register. + * @mask: mask to write the das flag for left alignment. + */ +struct st_sensor_das { + u8 addr; + u8 mask; +}; + /** * struct st_sensor_data_ready_irq - ST sensor device data-ready interrupt * @addr: address of the register. @@ -185,6 +195,7 @@ struct st_sensor_transfer_function { * @enable_axis: Enable one or more axis of the sensor. * @fs: Full scale register and full scale list available. * @bdu: Block data update register. + * @das: Data Alignment Selection register. * @drdy_irq: Data ready register of the sensor. * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read. * @bootime: samples to discard when sensor passing from power-down to power-up. @@ -200,6 +211,7 @@ struct st_sensor_settings { struct st_sensor_axis enable_axis; struct st_sensor_fullscale fs; struct st_sensor_bdu bdu; + struct st_sensor_das das; struct st_sensor_data_ready_irq drdy_irq; bool multi_read_bit; unsigned int bootime; -- GitLab From 5f7fd4d2f94d41ec3a7d31c5731e70a6d200c74f Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 20 Dec 2016 13:57:32 +0200 Subject: [PATCH 0292/1442] usb: dwc3: ep0: add dwc3_ep0_prepare_one_trb() commit 7931ec86c1b738e4e90e58c6d95e5f720d45ee56 upstream. For now this is just a cleanup patch, no functional changes. We will be using the new function to fix a bug introduced long ago by commit 0416e494ce7d ("usb: dwc3: ep0: correct cache sync issue in case of ep0_bounced") and further worsened by commit c0bd5456a470 ("usb: dwc3: ep0: handle non maxpacket aligned transfers > 512") Reported-by: Janusz Dziedzic Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ep0.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index fe79d771dee4..7c05e64b3ef7 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -55,20 +55,13 @@ static const char *dwc3_ep0_state_string(enum dwc3_ep0_state state) } } -static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma, - u32 len, u32 type, bool chain) +static void dwc3_ep0_prepare_one_trb(struct dwc3 *dwc, u8 epnum, + dma_addr_t buf_dma, u32 len, u32 type, bool chain) { - struct dwc3_gadget_ep_cmd_params params; struct dwc3_trb *trb; struct dwc3_ep *dep; - int ret; - dep = dwc->eps[epnum]; - if (dep->flags & DWC3_EP_BUSY) { - dwc3_trace(trace_dwc3_ep0, "%s still busy", dep->name); - return 0; - } trb = &dwc->ep0_trb[dep->trb_enqueue]; @@ -89,15 +82,28 @@ static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma, trb->ctrl |= (DWC3_TRB_CTRL_IOC | DWC3_TRB_CTRL_LST); - if (chain) + trace_dwc3_prepare_trb(dep, trb); +} + +static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma, + u32 len, u32 type, bool chain) +{ + struct dwc3_gadget_ep_cmd_params params; + struct dwc3_ep *dep; + int ret; + + dep = dwc->eps[epnum]; + if (dep->flags & DWC3_EP_BUSY) { + dwc3_trace(trace_dwc3_ep0, "%s still busy", dep->name); return 0; + } + + dwc3_ep0_prepare_one_trb(dwc, epnum, buf_dma, len, type, chain); memset(¶ms, 0, sizeof(params)); params.param0 = upper_32_bits(dwc->ep0_trb_addr); params.param1 = lower_32_bits(dwc->ep0_trb_addr); - trace_dwc3_prepare_trb(dep, trb); - ret = dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_STARTTRANSFER, ¶ms); if (ret < 0) { dwc3_trace(trace_dwc3_ep0, "%s STARTTRANSFER failed", -- GitLab From c008309f53e57af31c4a7210b4e2c68b1412c899 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 20 Dec 2016 14:08:48 +0200 Subject: [PATCH 0293/1442] usb: dwc3: ep0: explicitly call dwc3_ep0_prepare_one_trb() commit 19ec31230eb3084431bc2e565fd085f79f564274 upstream. Let's call dwc3_ep0_prepare_one_trb() explicitly because there are occasions where we will need more than one TRB to handle an EP0 transfer. A follow-up patch will fix one bug related to multiple-TRB Data Phases when it comes to mapping/unmapping requests for DMA. Reported-by: Janusz Dziedzic Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ep0.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 7c05e64b3ef7..2331469f943d 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -85,8 +85,7 @@ static void dwc3_ep0_prepare_one_trb(struct dwc3 *dwc, u8 epnum, trace_dwc3_prepare_trb(dep, trb); } -static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma, - u32 len, u32 type, bool chain) +static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum) { struct dwc3_gadget_ep_cmd_params params; struct dwc3_ep *dep; @@ -98,8 +97,6 @@ static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma, return 0; } - dwc3_ep0_prepare_one_trb(dwc, epnum, buf_dma, len, type, chain); - memset(¶ms, 0, sizeof(params)); params.param0 = upper_32_bits(dwc->ep0_trb_addr); params.param1 = lower_32_bits(dwc->ep0_trb_addr); @@ -314,8 +311,9 @@ void dwc3_ep0_out_start(struct dwc3 *dwc) { int ret; - ret = dwc3_ep0_start_trans(dwc, 0, dwc->ctrl_req_addr, 8, + dwc3_ep0_prepare_one_trb(dwc, 0, dwc->ctrl_req_addr, 8, DWC3_TRBCTL_CONTROL_SETUP, false); + ret = dwc3_ep0_start_trans(dwc, 0); WARN_ON(ret < 0); } @@ -886,9 +884,9 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc, dwc->ep0_next_event = DWC3_EP0_COMPLETE; - ret = dwc3_ep0_start_trans(dwc, epnum, - dwc->ctrl_req_addr, 0, - DWC3_TRBCTL_CONTROL_DATA, false); + dwc3_ep0_prepare_one_trb(dwc, epnum, dwc->ctrl_req_addr, + 0, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, epnum); WARN_ON(ret < 0); } } @@ -972,9 +970,10 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, req->direction = !!dep->number; if (req->request.length == 0) { - ret = dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, dwc->ctrl_req_addr, 0, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, dep->number); } else if (!IS_ALIGNED(req->request.length, dep->endpoint.maxpacket) && (dep->number == 0)) { u32 transfer_size = 0; @@ -992,7 +991,7 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, if (req->request.length > DWC3_EP0_BOUNCE_SIZE) { transfer_size = ALIGN(req->request.length - maxpacket, maxpacket); - ret = dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, req->request.dma, transfer_size, DWC3_TRBCTL_CONTROL_DATA, @@ -1004,9 +1003,10 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, dwc->ep0_bounced = true; - ret = dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, dwc->ep0_bounce_addr, transfer_size, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, dep->number); } else { ret = usb_gadget_map_request(&dwc->gadget, &req->request, dep->number); @@ -1015,9 +1015,10 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, return; } - ret = dwc3_ep0_start_trans(dwc, dep->number, req->request.dma, + dwc3_ep0_prepare_one_trb(dwc, dep->number, req->request.dma, req->request.length, DWC3_TRBCTL_CONTROL_DATA, false); + ret = dwc3_ep0_start_trans(dwc, dep->number); } WARN_ON(ret < 0); @@ -1031,8 +1032,9 @@ static int dwc3_ep0_start_control_status(struct dwc3_ep *dep) type = dwc->three_stage_setup ? DWC3_TRBCTL_CONTROL_STATUS3 : DWC3_TRBCTL_CONTROL_STATUS2; - return dwc3_ep0_start_trans(dwc, dep->number, + dwc3_ep0_prepare_one_trb(dwc, dep->number, dwc->ctrl_req_addr, 0, type, false); + return dwc3_ep0_start_trans(dwc, dep->number); } static void __dwc3_ep0_do_control_status(struct dwc3 *dwc, struct dwc3_ep *dep) -- GitLab From 5af6f56bb16ca7a90220f2757cbfa05e02d11976 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 20 Dec 2016 14:14:40 +0200 Subject: [PATCH 0294/1442] usb: dwc3: gadget: always unmap EP0 requests commit d62145929992f331fdde924d5963ab49588ccc7d upstream. commit 0416e494ce7d ("usb: dwc3: ep0: correct cache sync issue in case of ep0_bounced") introduced a bug where we would leak DMA resources which would cause us to starve the system of them resulting in failing DMA transfers. Fix the bug by making sure that we always unmap EP0 requests since those are *always* mapped. Fixes: 0416e494ce7d ("usb: dwc3: ep0: correct cache sync issue in case of ep0_bounced") Cc: Tested-by: Tomasz Medrek Reported-by: Janusz Dziedzic Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 7eff0412e3bb..d2b860ebfe13 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -182,11 +182,11 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, if (req->request.status == -EINPROGRESS) req->request.status = status; - if (dwc->ep0_bounced && dep->number == 0) + if (dwc->ep0_bounced && dep->number <= 1) dwc->ep0_bounced = false; - else - usb_gadget_unmap_request(&dwc->gadget, &req->request, - req->direction); + + usb_gadget_unmap_request(&dwc->gadget, &req->request, + req->direction); trace_dwc3_gadget_giveback(req); -- GitLab From 8b4879154a67442705bb1af6610832ee06689785 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 19 Oct 2016 22:29:53 +0100 Subject: [PATCH 0295/1442] drm/i915/dp: add lane_count check in intel_dp_check_link_status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d4cb3fd9b548b8bfe2a712ec920b9ebabd3547ab upstream. Currently it's entirely possible to go through the link training step without first determining the lane_count, which is silly since we end up doing a bunch of aux transfers of size = 0, as highlighted by WARN_ON(!msg->buffer != !msg->size), and can only ever result in a 'failed to update link training' message. This can be observed during intel_dp_long_pulse where we can do the link training step, but before we have had a chance to set the link params. To avoid this we add an extra check for the lane_count in intel_dp_check_link_status, which should prevent us from doing the link training step prematurely. v2: add WARN_ON_ONCE and FIXME comment (Ville) References: https://bugs.freedesktop.org/show_bug.cgi?id=97344 Cc: Ville Syrjälä Cc: Mika Kahola Signed-off-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/1476912593-10019-1-git-send-email-matthew.auld@intel.com Signed-off-by: Ville Syrjälä Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 13ae67cc6b8a..45e99d571c9e 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4018,6 +4018,11 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) if (!to_intel_crtc(intel_encoder->base.crtc)->active) return; + /* FIXME: we need to synchronize this sort of stuff with hardware + * readout */ + if (WARN_ON_ONCE(!intel_dp->lane_count)) + return; + /* if link training is requested we should perform it always */ if ((intel_dp->compliance_test_type == DP_TEST_LINK_TRAINING) || (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count))) { -- GitLab From 1d9c33f1b45ccc1dfbf55c97d7e1d02155e05787 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 13 Dec 2016 20:54:14 +0100 Subject: [PATCH 0296/1442] drm/i915: tune down the fast link training vs boot fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2c57b18adb93fc070039538f1ce375d3d3e99bbb upstream. It's been unfixed since a while and no one is immediately working on this. And we have the FIXME already. And now also a task in the DP team's backlog. Cc: Linus Torvalds Cc: stable@vger.kernel.org Cc: Ville Syrjälä References: https://lists.freedesktop.org/archives/intel-gfx/2016-July/101951.html Acked-by: Ville Syrjälä [danvet: Adjust comment per Ville's feedback.] Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161213195414.28923-1-daniel.vetter@ffwll.ch Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2dd85aeb5bc99e3763dd192cdb95ff405a102c8a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 45e99d571c9e..055525013d2f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4019,8 +4019,8 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) return; /* FIXME: we need to synchronize this sort of stuff with hardware - * readout */ - if (WARN_ON_ONCE(!intel_dp->lane_count)) + * readout. Currently fast link training doesn't work on boot-up. */ + if (!intel_dp->lane_count) return; /* if link training is requested we should perform it always */ -- GitLab From cd84516473a78a6932b1e034e71742e14842e8dc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 18 Oct 2016 23:12:08 +0300 Subject: [PATCH 0297/1442] mac80211: fix tid_agg_rx NULL dereference commit 1c3d185a9a0b136a58e73b02912d593d0303d1da upstream. On drivers setting the SUPPORTS_REORDERING_BUFFER hardware flag, we crash when the peer sends an AddBA request while we already have a session open on the seame TID; this is because on those drivers, the tid_agg_rx is left NULL even though the session is valid, and the agg_session_valid bit is set. To fix this, store the dialog tokens outside the tid_agg_rx to be able to compare them to the received AddBA request. Fixes: f89e07d4cf26 ("mac80211: agg-rx: refuse ADDBA Request with timeout update") Reported-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/agg-rx.c | 8 ++------ net/mac80211/debugfs_sta.c | 2 +- net/mac80211/sta_info.h | 4 ++-- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index f6749dced021..3b5fd4188f2a 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -315,11 +315,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, mutex_lock(&sta->ampdu_mlme.mtx); if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) { - tid_agg_rx = rcu_dereference_protected( - sta->ampdu_mlme.tid_rx[tid], - lockdep_is_held(&sta->ampdu_mlme.mtx)); - - if (tid_agg_rx->dialog_token == dialog_token) { + if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) { ht_dbg_ratelimited(sta->sdata, "updated AddBA Req from %pM on tid %u\n", sta->sta.addr, tid); @@ -396,7 +392,6 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, } /* update data */ - tid_agg_rx->dialog_token = dialog_token; tid_agg_rx->ssn = start_seq_num; tid_agg_rx->head_seq_num = start_seq_num; tid_agg_rx->buf_size = buf_size; @@ -418,6 +413,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, if (status == WLAN_STATUS_SUCCESS) { __set_bit(tid, sta->ampdu_mlme.agg_session_valid); __clear_bit(tid, sta->ampdu_mlme.unexpected_agg); + sta->ampdu_mlme.tid_rx_token[tid] = dialog_token; } mutex_unlock(&sta->ampdu_mlme.mtx); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index a2fcdb47a0e6..14ec63a02669 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -205,7 +205,7 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i); p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_rx); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", - tid_rx ? tid_rx->dialog_token : 0); + tid_rx ? sta->ampdu_mlme.tid_rx_token[i] : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", tid_rx ? tid_rx->ssn : 0); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ed5fcb984a01..dd06ef0b8861 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -184,7 +184,6 @@ struct tid_ampdu_tx { * @ssn: Starting Sequence Number expected to be aggregated. * @buf_size: buffer size for incoming A-MPDUs * @timeout: reset timer value (in TUs). - * @dialog_token: dialog token for aggregation session * @rcu_head: RCU head used for freeing this struct * @reorder_lock: serializes access to reorder buffer, see below. * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and @@ -213,7 +212,6 @@ struct tid_ampdu_rx { u16 ssn; u16 buf_size; u16 timeout; - u8 dialog_token; bool auto_seq; bool removed; }; @@ -225,6 +223,7 @@ struct tid_ampdu_rx { * to tid_tx[idx], which are protected by the sta spinlock) * tid_start_tx is also protected by sta->lock. * @tid_rx: aggregation info for Rx per TID -- RCU protected + * @tid_rx_token: dialog tokens for valid aggregation sessions * @tid_rx_timer_expired: bitmap indicating on which TIDs the * RX timer expired until the work for it runs * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the @@ -243,6 +242,7 @@ struct sta_ampdu_mlme { struct mutex mtx; /* rx */ struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS]; + u8 tid_rx_token[IEEE80211_NUM_TIDS]; unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; -- GitLab From 45816391e1a5f32e25b8eb2f0c18033ea4b4b6a4 Mon Sep 17 00:00:00 2001 From: Vamsi Krishna Date: Fri, 2 Dec 2016 23:59:08 +0200 Subject: [PATCH 0298/1442] nl80211: Use different attrs for BSSID and random MAC addr in scan req commit 2fa436b3a2a7009c11a3bc03fe0ff4c26e80fd87 upstream. NL80211_ATTR_MAC was used to set both the specific BSSID to be scanned and the random MAC address to be used when privacy is enabled. When both the features are enabled, both the BSSID and the local MAC address were getting same value causing Probe Request frames to go with unintended DA. Hence, this has been fixed by using a different NL80211_ATTR_BSSID attribute to set the specific BSSID (which was the more recent addition in cfg80211) for a scan. Backwards compatibility with old userspace software is maintained to some extent by allowing NL80211_ATTR_MAC to be used to set the specific BSSID when scanning without enabling random MAC address use. Scanning with random source MAC address was introduced by commit ad2b26abc157 ("cfg80211: allow drivers to support random MAC addresses for scan") and the issue was introduced with the addition of the second user for the same attribute in commit 818965d39177 ("cfg80211: Allow a scan request for a specific BSSID"). Fixes: 818965d39177 ("cfg80211: Allow a scan request for a specific BSSID") Signed-off-by: Vamsi Krishna Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/nl80211.h | 7 ++++++- net/wireless/nl80211.c | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 56368e9b4622..d3cbe48b286d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -323,7 +323,7 @@ * @NL80211_CMD_GET_SCAN: get scan results * @NL80211_CMD_TRIGGER_SCAN: trigger a new scan with the given parameters * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the - * probe requests at CCK rate or not. %NL80211_ATTR_MAC can be used to + * probe requests at CCK rate or not. %NL80211_ATTR_BSSID can be used to * specify a BSSID to scan for; if not included, the wildcard BSSID will * be used. * @NL80211_CMD_NEW_SCAN_RESULTS: scan notification (as a reply to @@ -1937,6 +1937,9 @@ enum nl80211_commands { * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. * See &enum nl80211_nan_match_attributes. * + * @NL80211_ATTR_BSSID: The BSSID of the AP. Note that %NL80211_ATTR_MAC is also + * used in various commands/events for specifying the BSSID. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2336,6 +2339,8 @@ enum nl80211_attrs { NL80211_ATTR_NAN_FUNC, NL80211_ATTR_NAN_MATCH, + NL80211_ATTR_BSSID, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c510810f0b7c..a2dd6edaae37 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -414,6 +414,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, + [NL80211_ATTR_BSSID] = { .len = ETH_ALEN }, }; /* policy for the key attributes */ @@ -6677,7 +6678,20 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); - if (info->attrs[NL80211_ATTR_MAC]) + /* Initial implementation used NL80211_ATTR_MAC to set the specific + * BSSID to scan for. This was problematic because that same attribute + * was already used for another purpose (local random MAC address). The + * NL80211_ATTR_BSSID attribute was added to fix this. For backwards + * compatibility with older userspace components, also use the + * NL80211_ATTR_MAC value here if it can be determined to be used for + * the specific BSSID use case instead of the random MAC address + * (NL80211_ATTR_SCAN_FLAGS is used to enable random MAC address use). + */ + if (info->attrs[NL80211_ATTR_BSSID]) + memcpy(request->bssid, + nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN); + else if (!(request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) && + info->attrs[NL80211_ATTR_MAC]) memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]), ETH_ALEN); else -- GitLab From a24f1f3520e61d46ca7867871d12345b4e5c2215 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Wed, 9 Nov 2016 03:40:57 +0200 Subject: [PATCH 0299/1442] ath10k: fix failure to send NULL func frame for 10.4 commit fcf7cf1551cae54e747a771f5808240f2a37708f upstream. This partially reverts 'commit 2cdce425aa33 ("ath10k: Fix broken NULL func data frame status for 10.4")' Unfortunately this breaks sending NULL func and the existing issue of obtaining proper tx status for NULL function will be fixed. Also update the comments for feature flag added to be useless and not working Fixes: 2cdce425aa33 "ath10k: Fix broken NULL func data frame status for 10.4" Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/core.h | 6 ++---- drivers/net/wireless/ath/ath10k/mac.c | 2 -- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 521f1c55c19e..be5b527472f9 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -557,10 +557,8 @@ enum ath10k_fw_features { */ ATH10K_FW_FEATURE_BTCOEX_PARAM = 14, - /* Older firmware with HTT delivers incorrect tx status for null func - * frames to driver, but this fixed in 10.2 and 10.4 firmware versions. - * Also this workaround results in reporting of incorrect null func - * status for 10.4. This flag is used to skip the workaround. + /* Unused flag and proven to be not working, enable this if you want + * to experiment sending NULL func data frames in HTT TX */ ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR = 15, diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 90eeb1c82e8b..f2e85eb22afe 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3255,8 +3255,6 @@ ath10k_mac_tx_h_get_txmode(struct ath10k *ar, if (ar->htt.target_version_major < 3 && (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && !test_bit(ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX, - ar->running_fw->fw_file.fw_features) && - !test_bit(ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR, ar->running_fw->fw_file.fw_features)) return ATH10K_HW_TXRX_MGMT; -- GitLab From ac86312e0870ad6d35b97cc79964acbf58e99424 Mon Sep 17 00:00:00 2001 From: Milo Kim Date: Tue, 15 Nov 2016 22:02:11 +0900 Subject: [PATCH 0300/1442] mfd: tps65217: Fix page fault on unloading modules commit 40a50f8b307de8d08f3fa37c312fc16a7dd233e5 upstream. TPS65217 IRQ domain should be removed and initialised as NULL when the module is unloaded for the next use. When tps65217.ko is loaded again, it causes the page fault. This patch fixes the error below. root@arm:~# lsmod | grep "tps" tps65217_charger 3538 0 tps65218_pwrbutton 2974 0 tps65217 6710 1 tps65217_charger root@arm:~# modprobe -r tps65217_charger root@arm:~# modprobe tps65217.ko [ 71.990277] Unable to handle kernel paging request at virtual address bf055944 [ 71.998063] pgd = dd3a4000 [ 72.000904] [bf055944] *pgd=9e6f7811, *pte=00000000, *ppte=00000000 [ 72.007567] Internal error: Oops: 7 [#1] SMP ARM [ 72.012404] Modules linked in: tps65217(+) evdev musb_dsps musb_hdrc udc_core tps65218_pwrbutton usbcore phy_am335] [ 72.055700] CPU: 0 PID: 243 Comm: modprobe Not tainted 4.9.0-rc5-next-20161114 #3 [ 72.063531] Hardware name: Generic AM33XX (Flattened Device Tree) [ 72.069899] task: de714380 task.stack: de7e6000 [ 72.074655] PC is at irq_find_matching_fwspec+0x88/0x100 [ 72.080211] LR is at 0xde7e79d8 [ 72.083496] pc : [] lr : [] psr: 200e0013 [ 72.083496] sp : de7e7a78 ip : 00000000 fp : dd138a68 [ 72.095506] r10: c0ca04f8 r9 : 00000018 r8 : de7e7ab8 [ 72.100973] r7 : 00000001 r6 : c0c4517c r5 : df963f68 r4 : de321980 [ 72.107797] r3 : bf055940 r2 : de714380 r1 : 00000000 r0 : 00000000 [ 72.114633] Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 72.122084] Control: 10c5387d Table: 9d3a4019 DAC: 00000051 [ 72.128097] Process modprobe (pid: 243, stack limit = 0xde7e6218) [ 72.134489] Stack: (0xde7e7a78 to 0xde7e8000) [ 72.139060] 7a60: df963f68 de7e7ab8 [ 72.147643] 7a80: 00000000 dd0e1000 dd491e20 c01a6ea0 600e0013 c01a5dc0 dd138a68 c0c45138 [ 72.156216] 7aa0: df963f68 00000000 df963f68 dd0e1010 00000000 c01a71a4 df963f68 00000001 [ 72.164800] 7ac0: 00000002 de7e7ac0 c80048b8 dd0adf00 df963f68 c0c4517c 00000000 de7e7b50 [ 72.173369] 7ae0: 00000018 c0ca04f8 dd138a68 c01a5dc0 df963f68 dd0e1010 00000000 dd0e1000 [ 72.181942] 7b00: dd491e20 c0653a70 df963f58 00000001 00000002 00000000 00000000 00000000 [ 72.190522] 7b20: 600e0093 c0cbf8f0 c0c0512c c0193674 00000001 00000080 00000000 c0554984 [ 72.199096] 7b40: 00000000 00000000 800e0013 c0553858 df963f68 00000000 00000000 00000000 [ 72.207674] 7b60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 72.216239] 7b80: 00000000 00000000 00000000 00000000 00000000 00000000 dd0e1000 c0544d24 [ 72.224816] 7ba0: dd491e10 dd0e1010 dd16e800 bf1d517c bf1d5620 dd0e1010 c1497ed4 bf1d5620 [ 72.233398] 7bc0: dd0e1010 fffffdfb bf1d5620 bf1d5620 00000000 c054537c c0545330 dd0e1010 [ 72.241967] 7be0: c1497ed4 00000000 bf1d5620 c05433ac 00000000 00000000 de7e7c28 c0543570 [ 72.250537] 7c00: 00000001 c1497e90 00000000 c0541884 de080cd4 dd44b7d4 dd0e1010 dd0e1010 [ 72.259109] 7c20: dd0e1044 c05430c8 dd0e1010 00000001 dd0e1010 dd0e1018 dd0e1010 c0c9e328 [ 72.267676] 7c40: de5d4020 c0542760 dd0e1018 dd0e1010 00000000 c0540ba8 dd138a40 c048dec4 [ 72.276253] 7c60: 00000000 dd0e1000 00000001 dd0e1000 dd0e1010 dd0e1000 bf233de0 dd138a40 [ 72.284829] 7c80: dd0e1010 c05450a0 000000bf 00000000 dd138a60 00000001 dd0e1000 c0571240 [ 72.293398] 7ca0: 00000000 dd1ce9c0 00000040 dd1ce9cc bf233de0 00000003 de5d4020 ffffffff [ 72.301969] 7cc0: 00000004 dd0adf00 00000000 c0571408 00000000 00000000 dd0adf00 de5d4020 [ 72.310543] 7ce0: c057146c dd1ce9c0 bf233d14 de5d4020 de7fb3d0 00000004 bf233d14 ffffffff [ 72.319120] 7d00: 00000018 dd49bf30 c01cedc0 c05714d0 00000000 00000000 dd0adf00 de322810 [ 72.327692] 7d20: de322810 00000000 dd033000 000000f0 00000001 bf2333fc 00000000 00000000 [ 72.336269] 7d40: dd0adf00 de5d4020 000000b6 bf233e40 de5d4020 bf233968 de5d4004 de5d4000 [ 72.344848] 7d60: bf233314 c06148ac de5d4020 c1497ed4 00000000 bf233e40 00000000 c05433ac [ 72.353422] 7d80: 00000000 de5d4020 bf233e40 de5d4054 00000000 bf236000 00000000 c0543538 [ 72.362002] 7da0: 00000000 bf233e40 c0543484 c05417e4 de1442a4 de5d04d0 bf233e40 de321300 [ 72.370582] 7dc0: c0caa5a4 c05429fc bf233be0 bf233e40 c0cbfa44 bf233e40 c0cbfa44 dd2f7740 [ 72.379148] 7de0: bf233f00 c05442f0 bf233e8c bf233e24 c0cbfa44 c0615ae0 00000000 bf233f00 [ 72.387718] 7e00: c0cbfa44 c010186c 200f0013 c0191650 de714380 00000000 600f0013 00000040 [ 72.396286] 7e20: dd2f7740 c018f1ac 00000001 c0c8356c 024000c0 c01a8854 c0c56e0e c028225c [ 72.404863] 7e40: dd2f7740 c0191984 de714380 dd2f7740 00000001 bf233f00 bf233f00 c0cbfa44 [ 72.413440] 7e60: dd2f7740 bf233f00 00000001 dd49bf08 dd49bf30 c0230998 00000001 c0c8356c [ 72.421997] 7e80: c0c4c536 c0cbfa44 c0c0512c c01d2070 bf233f0c 00007fff bf233f00 c01cf5b8 [ 72.430570] 7ea0: 00000000 c1475134 c01cee34 bf23411c bf233f48 bf234054 bf234150 00000000 [ 72.439144] 7ec0: 024002c2 de7fbf40 0009bc20 c02776ac ff800000 00000000 00000000 bf233670 [ 72.447723] 7ee0: 00000004 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 72.456298] 7f00: 00000000 00000000 00000000 00000000 c01d2590 0000aa41 00000000 00000000 [ 72.464862] 7f20: 000b2549 e12c3a41 00000051 de7e6000 0009bc20 c01d2630 00000530 e12b9000 [ 72.473438] 7f40: 0000aa41 e12c1434 e12c1211 e12c336c 00001150 00001620 00000000 00000000 [ 72.482003] 7f60: 00000000 000010fc 00000035 00000036 0000001d 0000001a 00000017 00000000 [ 72.490564] 7f80: de7e6000 3ba39a00 0009b008 0009b718 00000080 c0107704 de7e6000 00000000 [ 72.499141] 7fa0: 0009f609 c0107560 3ba39a00 0009b008 000a7b08 0000aa41 0009bc20 0000aa41 [ 72.507717] 7fc0: 3ba39a00 0009b008 0009b718 00000080 00000001 00000008 0009ab14 0009f609 [ 72.516290] 7fe0: bea31ab8 bea31aa8 0001e5eb b6e83b42 800f0030 000a7b08 0000ffff 0840ffff [ 72.524883] [] (irq_find_matching_fwspec) from [] (irq_create_fwspec_mapping+0x28/0x2e0) [ 72.535174] [] (irq_create_fwspec_mapping) from [] (irq_create_of_mapping+0x4c/0x54) [ 72.545115] [] (irq_create_of_mapping) from [] (of_irq_get+0x58/0x68) [ 72.553699] [] (of_irq_get) from [] (platform_get_irq+0x1c/0xec) [ 72.561828] [] (platform_get_irq) from [] (tps6521x_pb_probe+0xd0/0x1a8 [tps65218_pwrbutton]) [ 72.572581] [] (tps6521x_pb_probe [tps65218_pwrbutton]) from [] (platform_drv_probe+0x4c/0xac) [ 72.583426] [] (platform_drv_probe) from [] (driver_probe_device+0x204/0x2dc) [ 72.592729] [] (driver_probe_device) from [] (bus_for_each_drv+0x58/0x8c) [ 72.601657] [] (bus_for_each_drv) from [] (__device_attach+0xb0/0x114) [ 72.610324] [] (__device_attach) from [] (bus_probe_device+0x88/0x90) [ 72.618898] [] (bus_probe_device) from [] (device_add+0x3b8/0x560) [ 72.627203] [] (device_add) from [] (platform_device_add+0xa8/0x208) [ 72.635693] [] (platform_device_add) from [] (mfd_add_device+0x240/0x338) [ 72.644634] [] (mfd_add_device) from [] (mfd_add_devices+0xa0/0x104) [ 72.653120] [] (mfd_add_devices) from [] (devm_mfd_add_devices+0x60/0xa8) [ 72.662077] [] (devm_mfd_add_devices) from [] (tps65217_probe+0xe8/0x2ec [tps65217]) [ 72.672026] [] (tps65217_probe [tps65217]) from [] (i2c_device_probe+0x168/0x1f4) [ 72.681695] [] (i2c_device_probe) from [] (driver_probe_device+0x204/0x2dc) [ 72.690816] [] (driver_probe_device) from [] (__driver_attach+0xb4/0xb8) [ 72.699657] [] (__driver_attach) from [] (bus_for_each_dev+0x60/0x94) [ 72.708224] [] (bus_for_each_dev) from [] (bus_add_driver+0x18c/0x214) [ 72.716892] [] (bus_add_driver) from [] (driver_register+0x78/0xf8) [ 72.725280] [] (driver_register) from [] (i2c_register_driver+0x38/0x80) [ 72.734120] [] (i2c_register_driver) from [] (do_one_initcall+0x3c/0x178) [ 72.743055] [] (do_one_initcall) from [] (do_init_module+0x5c/0x1d0) [ 72.751537] [] (do_init_module) from [] (load_module+0x1d10/0x21c0) [ 72.759933] [] (load_module) from [] (SyS_init_module+0x110/0x154) [ 72.768242] [] (SyS_init_module) from [] (ret_fast_syscall+0x0/0x1c) [ 72.776725] Code: e5944000 e1540006 0a00001b e594300c (e593c004) [ 72.783181] ---[ end trace 0278ec325f4689b8 ]--- Fixes: 6556bdacf646 ("mfd: tps65217: Add support for IRQs") Signed-off-by: Milo Kim Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/tps65217.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c index 9a4d8684dd32..df2e7756927f 100644 --- a/drivers/mfd/tps65217.c +++ b/drivers/mfd/tps65217.c @@ -424,6 +424,24 @@ static int tps65217_probe(struct i2c_client *client, return 0; } +static int tps65217_remove(struct i2c_client *client) +{ + struct tps65217 *tps = i2c_get_clientdata(client); + unsigned int virq; + int i; + + for (i = 0; i < ARRAY_SIZE(tps65217_irqs); i++) { + virq = irq_find_mapping(tps->irq_domain, i); + if (virq) + irq_dispose_mapping(virq); + } + + irq_domain_remove(tps->irq_domain); + tps->irq_domain = NULL; + + return 0; +} + static const struct i2c_device_id tps65217_id_table[] = { {"tps65217", TPS65217}, { /* sentinel */ } @@ -437,6 +455,7 @@ static struct i2c_driver tps65217_driver = { }, .id_table = tps65217_id_table, .probe = tps65217_probe, + .remove = tps65217_remove, }; static int __init tps65217_init(void) -- GitLab From 15e0355a1ec51e1eaf140dddeaf55d70eb91d183 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sat, 29 Oct 2016 13:17:37 +0200 Subject: [PATCH 0301/1442] ath10k: use the right length of "background" commit 31b239824ece321c09bdb8e61e1d14814eaba38b upstream. The word "background" contains 10 characters so the third argument of strncmp() need to be 10 in order to match this prefix correctly. Signed-off-by: Nicolas Iooss Fixes: 855aed1220d2 ("ath10k: add spectral scan feature") Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/spectral.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/spectral.c b/drivers/net/wireless/ath/ath10k/spectral.c index 7d9b0da1b010..2ffc1fe4923b 100644 --- a/drivers/net/wireless/ath/ath10k/spectral.c +++ b/drivers/net/wireless/ath/ath10k/spectral.c @@ -338,7 +338,7 @@ static ssize_t write_file_spec_scan_ctl(struct file *file, } else { res = -EINVAL; } - } else if (strncmp("background", buf, 9) == 0) { + } else if (strncmp("background", buf, 10) == 0) { res = ath10k_spectral_scan_config(ar, SPECTRAL_BACKGROUND); } else if (strncmp("manual", buf, 6) == 0) { res = ath10k_spectral_scan_config(ar, SPECTRAL_MANUAL); -- GitLab From 4e1dfb0035e1625cb46e229034887bbc4b159d4f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 30 Oct 2016 09:09:18 -0700 Subject: [PATCH 0302/1442] cris: Only build flash rescue image if CONFIG_ETRAX_AXISFLASHMAP is selected commit 328cf6927bb72cadefddebbc9a23c793108147a2 upstream. If CONFIG_ETRAX_AXISFLASHMAP is not configured, the flash rescue image object file is empty. With recent versions of binutils, this results in the following build error. cris-linux-objcopy: error: the input file 'arch/cris/boot/rescue/rescue.o' has no sections This is seen, for example, when trying to build cris:allnoconfig with recently generated toolchains. Since it does not make sense to build a flash rescue image if there is no flash, only build it if CONFIG_ETRAX_AXISFLASHMAP is enabled. Reported-by: kbuild test robot Fixes: 66ab3a74c5ce ("CRIS: Merge machine dependent boot/compressed ..") Signed-off-by: Guenter Roeck Signed-off-by: Jesper Nilsson Signed-off-by: Greg Kroah-Hartman --- arch/cris/boot/rescue/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/cris/boot/rescue/Makefile b/arch/cris/boot/rescue/Makefile index 52bd0bd1dd22..d98edbb30a18 100644 --- a/arch/cris/boot/rescue/Makefile +++ b/arch/cris/boot/rescue/Makefile @@ -10,6 +10,9 @@ asflags-y += $(LINUXINCLUDE) ccflags-y += -O2 $(LINUXINCLUDE) + +ifdef CONFIG_ETRAX_AXISFLASHMAP + arch-$(CONFIG_ETRAX_ARCH_V10) = v10 arch-$(CONFIG_ETRAX_ARCH_V32) = v32 @@ -28,6 +31,11 @@ $(obj)/rescue.bin: $(obj)/rescue.o FORCE $(call if_changed,objcopy) cp -p $(obj)/rescue.bin $(objtree) +else +$(obj)/rescue.bin: + +endif + $(obj)/testrescue.bin: $(obj)/testrescue.o $(OBJCOPY) $(OBJCOPYFLAGS) $(obj)/testrescue.o tr.bin # Pad it to 784 bytes -- GitLab From 47e3472507f0fe563da9291785c1f01693ee22e4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 18 Oct 2016 15:33:18 +0100 Subject: [PATCH 0303/1442] efi/efivar_ssdt_load: Don't return success on allocation failure commit a75dcb5848359f488c32c0aef8711d9bd37a77b8 upstream. We should return -ENOMEM here, instead of success. Signed-off-by: Dan Carpenter Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 475fb4e8b2f4 ("efi / ACPI: load SSTDs from EFI variables") Link: http://lkml.kernel.org/r/20161018143318.15673-9-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 1ac199cd75e7..a4944e22f294 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -259,8 +259,10 @@ static __init int efivar_ssdt_load(void) } data = kmalloc(size, GFP_KERNEL); - if (!data) + if (!data) { + ret = -ENOMEM; goto free_entry; + } ret = efivar_entry_get(entry, NULL, &size, data); if (ret) { -- GitLab From 629138cd28becfed37527a02219150e1c2aa580a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 3 Oct 2016 13:03:38 +0200 Subject: [PATCH 0304/1442] clk: renesas: cpg-mssr: Fix inverted debug check commit bc4725d9029e2c8205fbaf1105e193d1c4e463bb upstream. The intention was to enable the checks if debugging is enabled, not disabled. Fixes: f793d1e51705b276 ("clk: shmobile: Add new CPG/MSSR driver core") Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/clk/renesas/renesas-cpg-mssr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c index e1365e7491ae..25c41cd9cdfc 100644 --- a/drivers/clk/renesas/renesas-cpg-mssr.c +++ b/drivers/clk/renesas/renesas-cpg-mssr.c @@ -33,9 +33,9 @@ #include "clk-div6.h" #ifdef DEBUG -#define WARN_DEBUG(x) do { } while (0) -#else #define WARN_DEBUG(x) WARN_ON(x) +#else +#define WARN_DEBUG(x) do { } while (0) #endif -- GitLab From e641c92fd2ae2c4a54810cae66c161c9cc5b06a4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Oct 2016 22:07:53 +0200 Subject: [PATCH 0305/1442] debugfs: improve DEFINE_DEBUGFS_ATTRIBUTE for !CONFIG_DEBUG_FS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7f847dd31736f1284538e54f46cf10e63929eb7f upstream. The slp_s0_residency_usec debugfs file currently uses DEFINE_DEBUGFS_ATTRIBUTE(), but that macro cannot really be used to define files outside of the debugfs code, as it has no reference to the get/set functions if CONFIG_DEBUG_FS is not defined: drivers/platform/x86/intel_pmc_core.c:80:12: error: ‘pmc_core_dev_state_get’ defined but not used [-Werror=unused-function] This fixes the macro to always contain the reference, and instead rely on the stubbed-out debugfs_create_file to not actually refer to its arguments so the compiler can still drop the reference. This works because the attribute definition is always 'static', and the dead-code removal silently drops all static symbols that are not used. Fixes: c64688081490 ("debugfs: add support for self-protecting attribute file fops") Fixes: df2294fb6428 ("intel_pmc_core: Convert to DEFINE_DEBUGFS_ATTRIBUTE") Signed-off-by: Arnd Bergmann [nicstange@gmail.com: Add dummy implementations of debugfs_attr_read() and debugfs_attr_write() in order to protect against possibly broken dead code elimination and to improve readability. Correct CONFIG_DEBUGFS_FS -> CONFIG_DEBUG_FS typo in changelog.] Signed-off-by: Nicolai Stange Reviewed-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 44 +++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 4d3f0d1aec73..1b413a9aab81 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -62,6 +62,21 @@ static inline const struct file_operations *debugfs_real_fops(struct file *filp) return filp->f_path.dentry->d_fsdata; } +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + __simple_attr_check_format(__fmt, 0ull); \ + return simple_attr_open(inode, file, __get, __set, __fmt); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = simple_attr_release, \ + .read = debugfs_attr_read, \ + .write = debugfs_attr_write, \ + .llseek = generic_file_llseek, \ +} + #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_create_file(const char *name, umode_t mode, @@ -99,21 +114,6 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf, ssize_t debugfs_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); -#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ -static int __fops ## _open(struct inode *inode, struct file *file) \ -{ \ - __simple_attr_check_format(__fmt, 0ull); \ - return simple_attr_open(inode, file, __get, __set, __fmt); \ -} \ -static const struct file_operations __fops = { \ - .owner = THIS_MODULE, \ - .open = __fops ## _open, \ - .release = simple_attr_release, \ - .read = debugfs_attr_read, \ - .write = debugfs_attr_write, \ - .llseek = generic_file_llseek, \ -} - struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); @@ -233,8 +233,18 @@ static inline void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) { } -#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ - static const struct file_operations __fops = { 0 } +static inline ssize_t debugfs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + return -ENODEV; +} + +static inline ssize_t debugfs_attr_write(struct file *file, + const char __user *buf, + size_t len, loff_t *ppos) +{ + return -ENODEV; +} static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) -- GitLab From bedcab8723cec3bf367ed28b6baa5350d54ef701 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 27 Oct 2016 17:15:15 +0300 Subject: [PATCH 0306/1442] x86/prctl/uapi: Remove #ifdef for CHECKPOINT_RESTORE commit a01aa6c9f40fe03c82032e7f8b3bcf1e6c93ac0e upstream. As userspace knows nothing about kernel config, thus #ifdefs around ABI prctl constants makes them invisible to userspace. Let it be clean'n'simple: remove #ifdefs. If kernel has CONFIG_CHECKPOINT_RESTORE disabled, sys_prctl() will return -EINVAL for those prctls. Reported-by: Paul Bolle Signed-off-by: Dmitry Safonov Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: Borislav Petkov Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Cc: oleg@redhat.com Fixes: 2eefd8789698 ("x86/arch_prctl/vdso: Add ARCH_MAP_VDSO_*") Link: http://lkml.kernel.org/r/20161027141516.28447-2-dsafonov@virtuozzo.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/prctl.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index ae135de547f5..835aa51c7f6e 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,10 +6,8 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 -#ifdef CONFIG_CHECKPOINT_RESTORE -# define ARCH_MAP_VDSO_X32 0x2001 -# define ARCH_MAP_VDSO_32 0x2002 -# define ARCH_MAP_VDSO_64 0x2003 -#endif +#define ARCH_MAP_VDSO_X32 0x2001 +#define ARCH_MAP_VDSO_32 0x2002 +#define ARCH_MAP_VDSO_64 0x2003 #endif /* _ASM_X86_PRCTL_H */ -- GitLab From 36c1bc65d211b929dda601f78c444c228d8df27f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 15 Dec 2016 10:14:42 -0800 Subject: [PATCH 0307/1442] x86/cpu: Probe CPUID leaf 6 even when cpuid_level == 6 commit 3df8d9208569ef0b2313e516566222d745f3b94b upstream. A typo (or mis-merge?) resulted in leaf 6 only being probed if cpuid_level >= 7. Fixes: 2ccd71f1b278 ("x86/cpufeature: Move some of the scattered feature bits to x86_capability") Signed-off-by: Andy Lutomirski Acked-by: Borislav Petkov Cc: Brian Gerst Link: http://lkml.kernel.org/r/6ea30c0e9daec21e488b54761881a6dfcf3e04d0.1481825597.git.luto@kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c2048b44851c..dd62708c6a67 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -667,13 +667,14 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_1_EDX] = edx; } + /* Thermal and Power Management Leaf: level 0x00000006 (eax) */ + if (c->cpuid_level >= 0x00000006) + c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006); + /* Additional Intel-defined flags: level 0x00000007 */ if (c->cpuid_level >= 0x00000007) { cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_7_0_EBX] = ebx; - - c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006); c->x86_capability[CPUID_7_ECX] = ecx; } -- GitLab From 7a13086bd4575738f385d062e3676e3b13af966c Mon Sep 17 00:00:00 2001 From: Micha? K?pie? Date: Fri, 23 Dec 2016 10:00:08 +0100 Subject: [PATCH 0308/1442] platform/x86: fujitsu-laptop: use brightness_set_blocking for LED-setting callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a608a9d52fa4168efd478d684039ed545a69dbcd upstream. All LED-setting functions in fujitsu-laptop are currently assigned to the brightness_set callback, which is incorrect because they can sleep (due to their use of call_fext_func(), which in turn issues ACPI calls) and the documentation (in include/linux/leds.h) clearly states they must not. Assign them to brightness_set_blocking instead and change them to match the expected function prototype. This change makes it possible to use Fujitsu-specific LEDs with "heavy" triggers, like disk-activity or phy0rx. Fixes: 3a407086090b ("fujitsu-laptop: Add BL power, LED control and radio state information") Fixes: 4f62568c1fcf ("fujitsu-laptop: Support radio LED") Fixes: d6b88f64b0d4 ("fujitsu-laptop: Add support for eco LED") Signed-off-by: Michał Kępień Acked-by: Jonathan Woithe Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/fujitsu-laptop.c | 42 +++++++++++++-------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 61f39abf5dc8..82d67715ce76 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -177,43 +177,43 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event); #if IS_ENABLED(CONFIG_LEDS_CLASS) static enum led_brightness logolamp_get(struct led_classdev *cdev); -static void logolamp_set(struct led_classdev *cdev, +static int logolamp_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev logolamp_led = { .name = "fujitsu::logolamp", .brightness_get = logolamp_get, - .brightness_set = logolamp_set + .brightness_set_blocking = logolamp_set }; static enum led_brightness kblamps_get(struct led_classdev *cdev); -static void kblamps_set(struct led_classdev *cdev, +static int kblamps_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev kblamps_led = { .name = "fujitsu::kblamps", .brightness_get = kblamps_get, - .brightness_set = kblamps_set + .brightness_set_blocking = kblamps_set }; static enum led_brightness radio_led_get(struct led_classdev *cdev); -static void radio_led_set(struct led_classdev *cdev, +static int radio_led_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev radio_led = { .name = "fujitsu::radio_led", .brightness_get = radio_led_get, - .brightness_set = radio_led_set + .brightness_set_blocking = radio_led_set }; static enum led_brightness eco_led_get(struct led_classdev *cdev); -static void eco_led_set(struct led_classdev *cdev, +static int eco_led_set(struct led_classdev *cdev, enum led_brightness brightness); static struct led_classdev eco_led = { .name = "fujitsu::eco_led", .brightness_get = eco_led_get, - .brightness_set = eco_led_set + .brightness_set_blocking = eco_led_set }; #endif @@ -267,48 +267,48 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2) #if IS_ENABLED(CONFIG_LEDS_CLASS) /* LED class callbacks */ -static void logolamp_set(struct led_classdev *cdev, +static int logolamp_set(struct led_classdev *cdev, enum led_brightness brightness) { if (brightness >= LED_FULL) { call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_ON); - call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_ON); } else if (brightness >= LED_HALF) { call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_ON); - call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_OFF); + return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_ALWAYS, FUNC_LED_OFF); } else { - call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_OFF); + return call_fext_func(FUNC_LEDS, 0x1, LOGOLAMP_POWERON, FUNC_LED_OFF); } } -static void kblamps_set(struct led_classdev *cdev, +static int kblamps_set(struct led_classdev *cdev, enum led_brightness brightness) { if (brightness >= LED_FULL) - call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_ON); else - call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_OFF); + return call_fext_func(FUNC_LEDS, 0x1, KEYBOARD_LAMPS, FUNC_LED_OFF); } -static void radio_led_set(struct led_classdev *cdev, +static int radio_led_set(struct led_classdev *cdev, enum led_brightness brightness) { if (brightness >= LED_FULL) - call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON); + return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON); else - call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0); + return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0); } -static void eco_led_set(struct led_classdev *cdev, +static int eco_led_set(struct led_classdev *cdev, enum led_brightness brightness) { int curr; curr = call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0); if (brightness >= LED_FULL) - call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr | ECO_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr | ECO_LED_ON); else - call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr & ~ECO_LED_ON); + return call_fext_func(FUNC_LEDS, 0x1, ECO_LED, curr & ~ECO_LED_ON); } static enum led_brightness logolamp_get(struct led_classdev *cdev) -- GitLab From af3cd3f0a805ab701f1e56ed93c676c77b7d2169 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 7 Nov 2016 17:31:44 -0300 Subject: [PATCH 0309/1442] hwmon: (scpi) Fix module autoload commit 13edb767aa609b6efb7c0c2b57fbd72a6ded0eed upstream. If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/hwmon/scpi-hwmon.ko | grep alias $ After this patch: $ modinfo drivers/hwmon/scpi-hwmon.ko | grep alias alias: of:N*T*Carm,scpi-sensorsC* alias: of:N*T*Carm,scpi-sensors Signed-off-by: Javier Martinez Canillas Fixes: ea98b29a05e9c ("hwmon: Support sensors exported via ARM SCP interface") Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/scpi-hwmon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c index 559a3dcd64d8..094f948f99ff 100644 --- a/drivers/hwmon/scpi-hwmon.c +++ b/drivers/hwmon/scpi-hwmon.c @@ -251,6 +251,7 @@ static const struct of_device_id scpi_of_match[] = { {.compatible = "arm,scpi-sensors"}, {}, }; +MODULE_DEVICE_TABLE(of, scpi_of_match); static struct platform_driver scpi_hwmon_platdrv = { .driver = { -- GitLab From 29e7b170db90a852b51409a76870cdbe57341a9b Mon Sep 17 00:00:00 2001 From: Jared Bents Date: Fri, 18 Nov 2016 22:20:38 -0600 Subject: [PATCH 0310/1442] hwmon: (amc6821) sign extension temperature commit 4538bfbf2d9f1fc48c07ac0cc0ee58716fe7fe96 upstream. Converts the unsigned temperature values from the i2c read to be sign extended as defined in the datasheet so that negative temperatures are properly read. Fixes: 28e6274d8fa67 ("hwmon: (amc6821) Avoid forward declaration") Signed-off-by: Jared Bents Signed-off-by: Matt Weber [groeck: Dropped unnecessary continuation line] Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/amc6821.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/amc6821.c b/drivers/hwmon/amc6821.c index 12e851a5af48..46b4e35fd555 100644 --- a/drivers/hwmon/amc6821.c +++ b/drivers/hwmon/amc6821.c @@ -188,8 +188,8 @@ static struct amc6821_data *amc6821_update_device(struct device *dev) !data->valid) { for (i = 0; i < TEMP_IDX_LEN; i++) - data->temp[i] = i2c_smbus_read_byte_data(client, - temp_reg[i]); + data->temp[i] = (int8_t)i2c_smbus_read_byte_data( + client, temp_reg[i]); data->stat1 = i2c_smbus_read_byte_data(client, AMC6821_REG_STAT1); -- GitLab From a69a6ebd036a6c7edf89db09ca358927c158dbe4 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 20 Nov 2016 10:37:39 -0800 Subject: [PATCH 0311/1442] hwmon: (ds620) Fix overflows seen when writing temperature limits commit e36ce99ee0815d7919a7b589bfb66f3de50b6bc7 upstream. Module test reports: temp1_max: Suspected overflow: [160000 vs. 0] temp1_min: Suspected overflow: [160000 vs. 0] This is seen because the values passed when writing temperature limits are unbound. Reviewed-by: Jean Delvare Fixes: 6099469805c2 ("hwmon: Support for Dallas Semiconductor DS620") Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/ds620.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ds620.c b/drivers/hwmon/ds620.c index edf550fc4eef..0043a4c02b85 100644 --- a/drivers/hwmon/ds620.c +++ b/drivers/hwmon/ds620.c @@ -166,7 +166,7 @@ static ssize_t set_temp(struct device *dev, struct device_attribute *da, if (res) return res; - val = (val * 10 / 625) * 8; + val = (clamp_val(val, -128000, 128000) * 10 / 625) * 8; mutex_lock(&data->update_lock); data->temp[attr->index] = val; -- GitLab From 81616a9f751e9b08a90d13c79e6ca73a74c4b446 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 4 Dec 2016 18:15:25 -0800 Subject: [PATCH 0312/1442] hwmon: (nct7802) Fix overflows seen when writing into limit attributes commit c0d04e9112ad59d73f23f3b0f6726c5e798dfcbf upstream. Fix overflows seen when writing voltage and temperature limit attributes. The value passed to DIV_ROUND_CLOSEST() needs to be clamped, and the value parameter passed to nct7802_write_fan_min() is an unsigned long. Also, writing values larger than 2700000 into a fan limit attribute results in writing 0 into the chip's limit registers. The exact behavior when writing this value is unspecified. For consistency, report a limit of 1350000 if the chip register reads 0. This may be wrong, and the chip behavior should be verified with the actual chip, but it is better than reporting a value of 0 (which, when written, results in writing a value of 0x1fff into the chip register). Fixes: 3434f3783580 ("hwmon: Driver for Nuvoton NCT7802Y") Reviewed-by: Jean Delvare Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct7802.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 3ce33d244cc0..12b94b094c0d 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -259,13 +259,15 @@ static int nct7802_read_fan_min(struct nct7802_data *data, u8 reg_fan_low, ret = 0; else if (ret) ret = DIV_ROUND_CLOSEST(1350000U, ret); + else + ret = 1350000U; abort: mutex_unlock(&data->access_lock); return ret; } static int nct7802_write_fan_min(struct nct7802_data *data, u8 reg_fan_low, - u8 reg_fan_high, unsigned int limit) + u8 reg_fan_high, unsigned long limit) { int err; @@ -326,8 +328,8 @@ static int nct7802_write_voltage(struct nct7802_data *data, int nr, int index, int shift = 8 - REG_VOLTAGE_LIMIT_MSB_SHIFT[index - 1][nr]; int err; + voltage = clamp_val(voltage, 0, 0x3ff * nct7802_vmul[nr]); voltage = DIV_ROUND_CLOSEST(voltage, nct7802_vmul[nr]); - voltage = clamp_val(voltage, 0, 0x3ff); mutex_lock(&data->access_lock); err = regmap_write(data->regmap, @@ -402,7 +404,7 @@ static ssize_t store_temp(struct device *dev, struct device_attribute *attr, if (err < 0) return err; - val = clamp_val(DIV_ROUND_CLOSEST(val, 1000), -128, 127); + val = DIV_ROUND_CLOSEST(clamp_val(val, -128000, 127000), 1000); err = regmap_write(data->regmap, nr, val & 0xff); return err ? : count; -- GitLab From 2aca9a4fe104e6e8d7ddca7042fe6d5379ead7f9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 11 Dec 2016 13:27:42 -0800 Subject: [PATCH 0313/1442] hwmon: (g762) Fix overflows and crash seen when writing limit attributes commit 4fccd4a1e8944033bcd7693ea4e8fb478cd2059a upstream. Fix overflows seen when writing into fan speed limit attributes. Also fix crash due to division by zero, seen when certain very large values (such as 2147483648, or 0x80000000) are written into fan speed limit attributes. Fixes: 594fbe713bf60 ("Add support for GMT G762/G763 PWM fan controllers") Cc: Arnaud Ebalard Reviewed-by: Jean Delvare Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/g762.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/g762.c b/drivers/hwmon/g762.c index b96a2a9e4df7..628be9c95ff9 100644 --- a/drivers/hwmon/g762.c +++ b/drivers/hwmon/g762.c @@ -193,14 +193,17 @@ static inline unsigned int rpm_from_cnt(u8 cnt, u32 clk_freq, u16 p, * Convert fan RPM value from sysfs into count value for fan controller * register (FAN_SET_CNT). */ -static inline unsigned char cnt_from_rpm(u32 rpm, u32 clk_freq, u16 p, +static inline unsigned char cnt_from_rpm(unsigned long rpm, u32 clk_freq, u16 p, u8 clk_div, u8 gear_mult) { - if (!rpm) /* to stop the fan, set cnt to 255 */ + unsigned long f1 = clk_freq * 30 * gear_mult; + unsigned long f2 = p * clk_div; + + if (!rpm) /* to stop the fan, set cnt to 255 */ return 0xff; - return clamp_val(((clk_freq * 30 * gear_mult) / (rpm * p * clk_div)), - 0, 255); + rpm = clamp_val(rpm, f1 / (255 * f2), ULONG_MAX / f2); + return DIV_ROUND_CLOSEST(f1, rpm * f2); } /* helper to grab and cache data, at most one time per second */ -- GitLab From 584cb7dd15a3779a8f9a4ac2ec117f1f73bda83d Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 2 Jan 2017 17:53:39 +0100 Subject: [PATCH 0314/1442] hwmon: (lm90) fix temp1_max_alarm attribute commit e9572fdd13e299cfba03abbfd2786c84ac055249 upstream. Since commit commit eb1c8f4325d5 ("hwmon: (lm90) Convert to use new hwmon registration API") the temp1_max_alarm and temp1_crit_alarm attributes are mapped to the same alarm bit. Fix the typo. Fixes: eb1c8f4325d5 ("hwmon: (lm90) Convert to use new hwmon registration API") Signed-off-by: Micehael Walle Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/lm90.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 322ed9272811..841f2428e84a 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -1036,7 +1036,7 @@ static const u8 lm90_temp_emerg_index[3] = { }; static const u8 lm90_min_alarm_bits[3] = { 5, 3, 11 }; -static const u8 lm90_max_alarm_bits[3] = { 0, 4, 12 }; +static const u8 lm90_max_alarm_bits[3] = { 6, 4, 12 }; static const u8 lm90_crit_alarm_bits[3] = { 0, 1, 9 }; static const u8 lm90_emergency_alarm_bits[3] = { 15, 13, 14 }; static const u8 lm90_fault_bits[3] = { 0, 2, 10 }; -- GitLab From 447433e5f804b1a73beb9b1051b9f847541442f0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Nov 2016 17:23:22 -0800 Subject: [PATCH 0315/1442] Input: synaptics-rmi4 - unlock on error commit 792f497b22afd0563b94dd8fa129a05f762a2c25 upstream. We should unlock before returning on this error path. Fixes: 3a762dbd5347 ('[media] Input: synaptics-rmi4 - add support for F54 diagnostics') Signed-off-by: Dan Carpenter Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/rmi4/rmi_f54.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index cf805b960866..2e934aef3d2a 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -200,7 +200,7 @@ static int rmi_f54_request_report(struct rmi_function *fn, u8 report_type) error = rmi_write(rmi_dev, fn->fd.command_base_addr, F54_GET_REPORT); if (error < 0) - return error; + goto unlock; init_completion(&f54->cmd_done); @@ -209,9 +209,10 @@ static int rmi_f54_request_report(struct rmi_function *fn, u8 report_type) queue_delayed_work(f54->workqueue, &f54->work, 0); +unlock: mutex_unlock(&f54->data_mutex); - return 0; + return error; } static size_t rmi_f54_get_report_size(struct f54_data *f54) -- GitLab From e3b665ef41106381a7ad3e6bbdf2ebba7682cc25 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 9 Nov 2016 17:08:28 -0800 Subject: [PATCH 0316/1442] clk: qcom: ipq806x: Fix board clk rates commit cbf2e548ca8ad4bb274d014e9a70bd841d29948e upstream. The clocks on these boards run at 25 MHz, not 19.2 and 27 like other platforms. Unfortunately I copy/pasted from other similar SoCs but forgot this one is different. Fix it. Fixes: a085f877a882 ("clk: qcom: Move cxo/pxo/xo into dt files") Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/gcc-ipq806x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/gcc-ipq806x.c b/drivers/clk/qcom/gcc-ipq806x.c index 52a7d3959875..28eb200d0f1e 100644 --- a/drivers/clk/qcom/gcc-ipq806x.c +++ b/drivers/clk/qcom/gcc-ipq806x.c @@ -2990,11 +2990,11 @@ static int gcc_ipq806x_probe(struct platform_device *pdev) struct regmap *regmap; int ret; - ret = qcom_cc_register_board_clk(dev, "cxo_board", "cxo", 19200000); + ret = qcom_cc_register_board_clk(dev, "cxo_board", "cxo", 25000000); if (ret) return ret; - ret = qcom_cc_register_board_clk(dev, "pxo_board", "pxo", 27000000); + ret = qcom_cc_register_board_clk(dev, "pxo_board", "pxo", 25000000); if (ret) return ret; -- GitLab From 411873a0390e8378bb47e1d5e03fb0b95fd2efb4 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 1 Dec 2016 14:25:44 +0800 Subject: [PATCH 0317/1442] clk: clk-wm831x: fix a logic error commit 20979202ee6e4c68dab7bcf408787225a656d18e upstream. Fix bug https://bugzilla.kernel.org/show_bug.cgi?id=188561. Function wm831x_clkout_is_prepared() returns "true" when it fails to read CLOCK_CONTROL_1. "true" means the device is already prepared. So return "true" on the read failure seems improper. Signed-off-by: Pan Bian Acked-by: Charles Keepax Fixes: f05259a6ffa4 ("clk: wm831x: Add initial WM831x clock driver") Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk-wm831x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-wm831x.c b/drivers/clk/clk-wm831x.c index f4fdac55727c..0621fbfb4beb 100644 --- a/drivers/clk/clk-wm831x.c +++ b/drivers/clk/clk-wm831x.c @@ -243,7 +243,7 @@ static int wm831x_clkout_is_prepared(struct clk_hw *hw) if (ret < 0) { dev_err(wm831x->dev, "Unable to read CLOCK_CONTROL_1: %d\n", ret); - return true; + return false; } return (ret & WM831X_CLKOUT_ENA) != 0; -- GitLab From 7af503c02b33504c82a88bc165e5ef77239faebd Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 29 Nov 2016 17:07:57 -0600 Subject: [PATCH 0318/1442] clk: ti: dra7: fix "failed to lookup clock node gmac_gmii_ref_clk_div" boot message commit f8d17344a60921c2387759fc0a85aa64299d1ec6 upstream. Prevent creating clk alias for non existing gmac_gmii_ref_clk_div clock and, this way, eliminate excessive error message during boot: "ti_dt_clocks_register: failed to lookup clock node gmac_gmii_ref_clk_div" Fixes: c097338ebd3f ("ARM: dts: dra7: cpsw: fix clocks tree") Signed-off-by: Grygorii Strashko Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/ti/clk-7xx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c index bfa17d33ef3b..9fd6043314eb 100644 --- a/drivers/clk/ti/clk-7xx.c +++ b/drivers/clk/ti/clk-7xx.c @@ -201,7 +201,6 @@ static struct ti_dt_clk dra7xx_clks[] = { DT_CLK(NULL, "atl_dpll_clk_mux", "atl_dpll_clk_mux"), DT_CLK(NULL, "atl_gfclk_mux", "atl_gfclk_mux"), DT_CLK(NULL, "dcan1_sys_clk_mux", "dcan1_sys_clk_mux"), - DT_CLK(NULL, "gmac_gmii_ref_clk_div", "gmac_gmii_ref_clk_div"), DT_CLK(NULL, "gmac_rft_clk_mux", "gmac_rft_clk_mux"), DT_CLK(NULL, "gpu_core_gclk_mux", "gpu_core_gclk_mux"), DT_CLK(NULL, "gpu_hyd_gclk_mux", "gpu_hyd_gclk_mux"), -- GitLab From 36a6f7005f38b86b938ab3b697ca61a6693eec1e Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 11 Nov 2016 18:05:57 +0800 Subject: [PATCH 0319/1442] clk: sunxi-ng: sun8i-a23: Set CLK_SET_RATE_PARENT for audio module clocks commit 937ff9ded8b6ebe8963ade55bdd77a61ded88075 upstream. The audio module clocks are supposed to be set according to the sample rate of the audio stream. The audio PLL provides the clock signal for these module clocks, and only it is freely tunable. Set CLK_SET_RATE_PARENT for the audio module clocks so their users can properly tune the clock rate. Fixes: 5690879d93e8 ("clk: sunxi-ng: Add A23 CCU") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Greg Kroah-Hartman --- drivers/clk/sunxi-ng/ccu-sun8i-a23.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a23.c b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c index 2646d980087b..5c6d37bdf247 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-a23.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c @@ -344,10 +344,10 @@ static SUNXI_CCU_MP_WITH_MUX_GATE(spi1_clk, "spi1", mod0_default_parents, 0x0a4, static const char * const i2s_parents[] = { "pll-audio-8x", "pll-audio-4x", "pll-audio-2x", "pll-audio" }; static SUNXI_CCU_MUX_WITH_GATE(i2s0_clk, "i2s0", i2s_parents, - 0x0b0, 16, 2, BIT(31), 0); + 0x0b0, 16, 2, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_MUX_WITH_GATE(i2s1_clk, "i2s1", i2s_parents, - 0x0b4, 16, 2, BIT(31), 0); + 0x0b4, 16, 2, BIT(31), CLK_SET_RATE_PARENT); /* TODO: the parent for most of the USB clocks is not known */ static SUNXI_CCU_GATE(usb_phy0_clk, "usb-phy0", "osc24M", @@ -415,7 +415,7 @@ static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve", 0x13c, 16, 3, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_GATE(ac_dig_clk, "ac-dig", "pll-audio", - 0x140, BIT(31), 0); + 0x140, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_GATE(avs_clk, "avs", "osc24M", 0x144, BIT(31), 0); -- GitLab From 6c9f628468723e85fa59651e258bd462091692ad Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 11 Nov 2016 18:05:58 +0800 Subject: [PATCH 0320/1442] clk: sunxi-ng: sun8i-h3: Set CLK_SET_RATE_PARENT for audio module clocks commit 0f6f9302b819ca352cfd4f42c18ec08d521f9cae upstream. The audio module clocks are supposed to be set according to the sample rate of the audio stream. The audio PLL provides the clock signal for these module clocks, and only it is freely tunable. Set CLK_SET_RATE_PARENT for the audio module clocks so their users can properly tune the clock rate. Fixes: 0577e4853bfb ("clk: sunxi-ng: Add H3 clocks") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Greg Kroah-Hartman --- drivers/clk/sunxi-ng/ccu-sun8i-h3.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c index 4d70590f05e3..21c427d86f28 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c @@ -394,16 +394,16 @@ static SUNXI_CCU_MP_WITH_MUX_GATE(spi1_clk, "spi1", mod0_default_parents, 0x0a4, static const char * const i2s_parents[] = { "pll-audio-8x", "pll-audio-4x", "pll-audio-2x", "pll-audio" }; static SUNXI_CCU_MUX_WITH_GATE(i2s0_clk, "i2s0", i2s_parents, - 0x0b0, 16, 2, BIT(31), 0); + 0x0b0, 16, 2, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_MUX_WITH_GATE(i2s1_clk, "i2s1", i2s_parents, - 0x0b4, 16, 2, BIT(31), 0); + 0x0b4, 16, 2, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_MUX_WITH_GATE(i2s2_clk, "i2s2", i2s_parents, - 0x0b8, 16, 2, BIT(31), 0); + 0x0b8, 16, 2, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_M_WITH_GATE(spdif_clk, "spdif", "pll-audio", - 0x0c0, 0, 4, BIT(31), 0); + 0x0c0, 0, 4, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_GATE(usb_phy0_clk, "usb-phy0", "osc24M", 0x0cc, BIT(8), 0); @@ -466,7 +466,7 @@ static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve", 0x13c, 16, 3, BIT(31), 0); static SUNXI_CCU_GATE(ac_dig_clk, "ac-dig", "pll-audio", - 0x140, BIT(31), 0); + 0x140, BIT(31), CLK_SET_RATE_PARENT); static SUNXI_CCU_GATE(avs_clk, "avs", "osc24M", 0x144, BIT(31), 0); -- GitLab From 5dd700e897e4d1f74fde0826ed2ed7db08c47327 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Mon, 26 Sep 2016 03:03:42 +0300 Subject: [PATCH 0321/1442] clk: imx31: fix rewritten input argument of mx31_clocks_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bae203d58b7dce89664071b3fafe20cedaa3e4f6 upstream. Function mx31_clocks_init() is called during clock intialization on legacy boards with reference clock frequency passed as its input argument, this can be verified by examination of the function declaration found in arch/arm/mach-imx/common.h and actual function users which include that header file. Inside CCF driver the function ignores its input argument, by chance the used value in the function body is the same as input arguments on side of all callers. Fixes: d9388c843237 ("clk: imx31: Do not call mxc_timer_init twice when booting with DT") Signed-off-by: Vladimir Zapolskiy Reviewed-by: Uwe Kleine-König Acked-by: Stephen Boyd Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- drivers/clk/imx/clk-imx31.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/clk/imx/clk-imx31.c b/drivers/clk/imx/clk-imx31.c index 6a964144a5b5..6a49ba2b9671 100644 --- a/drivers/clk/imx/clk-imx31.c +++ b/drivers/clk/imx/clk-imx31.c @@ -157,10 +157,8 @@ static void __init _mx31_clocks_init(unsigned long fref) } } -int __init mx31_clocks_init(void) +int __init mx31_clocks_init(unsigned long fref) { - u32 fref = 26000000; /* default */ - _mx31_clocks_init(fref); clk_register_clkdev(clk[gpt_gate], "per", "imx-gpt.0"); -- GitLab From 54eed7ab1367f3d802ba516a60967e8547224a55 Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Thu, 15 Dec 2016 12:00:27 -0500 Subject: [PATCH 0322/1442] clk: renesas: mstp: Support 8-bit registers for r7s72100 commit e2a33c34ddff22ee208d80abdd12b88a98d6cb60 upstream. The RZ/A1 is different than the other Renesas SOCs because the MSTP registers are 8-bit instead of 32-bit and if you try writing values as 32-bit nothing happens...meaning this driver never worked for r7s72100. Fixes: b6face404f38 ("ARM: shmobile: r7s72100: add essential clock nodes to dtsi") Signed-off-by: Chris Brandt Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Acked-by: Kuninori Morimoto Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/renesas/clk-mstp.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c index 9375777776d9..b533f99550e1 100644 --- a/drivers/clk/renesas/clk-mstp.c +++ b/drivers/clk/renesas/clk-mstp.c @@ -37,12 +37,14 @@ * @smstpcr: module stop control register * @mstpsr: module stop status register (optional) * @lock: protects writes to SMSTPCR + * @width_8bit: registers are 8-bit, not 32-bit */ struct mstp_clock_group { struct clk_onecell_data data; void __iomem *smstpcr; void __iomem *mstpsr; spinlock_t lock; + bool width_8bit; }; /** @@ -59,6 +61,18 @@ struct mstp_clock { #define to_mstp_clock(_hw) container_of(_hw, struct mstp_clock, hw) +static inline u32 cpg_mstp_read(struct mstp_clock_group *group, + u32 __iomem *reg) +{ + return group->width_8bit ? readb(reg) : clk_readl(reg); +} + +static inline void cpg_mstp_write(struct mstp_clock_group *group, u32 val, + u32 __iomem *reg) +{ + group->width_8bit ? writeb(val, reg) : clk_writel(val, reg); +} + static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) { struct mstp_clock *clock = to_mstp_clock(hw); @@ -70,12 +84,12 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) spin_lock_irqsave(&group->lock, flags); - value = clk_readl(group->smstpcr); + value = cpg_mstp_read(group, group->smstpcr); if (enable) value &= ~bitmask; else value |= bitmask; - clk_writel(value, group->smstpcr); + cpg_mstp_write(group, value, group->smstpcr); spin_unlock_irqrestore(&group->lock, flags); @@ -83,7 +97,7 @@ static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable) return 0; for (i = 1000; i > 0; --i) { - if (!(clk_readl(group->mstpsr) & bitmask)) + if (!(cpg_mstp_read(group, group->mstpsr) & bitmask)) break; cpu_relax(); } @@ -114,9 +128,9 @@ static int cpg_mstp_clock_is_enabled(struct clk_hw *hw) u32 value; if (group->mstpsr) - value = clk_readl(group->mstpsr); + value = cpg_mstp_read(group, group->mstpsr); else - value = clk_readl(group->smstpcr); + value = cpg_mstp_read(group, group->smstpcr); return !(value & BIT(clock->bit_index)); } @@ -188,6 +202,9 @@ static void __init cpg_mstp_clocks_init(struct device_node *np) return; } + if (of_device_is_compatible(np, "renesas,r7s72100-mstp-clocks")) + group->width_8bit = true; + for (i = 0; i < MSTP_MAX_CLOCKS; ++i) clks[i] = ERR_PTR(-ENOENT); -- GitLab From 48ffae87e91346382790af10d793cba4d2e1d341 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 24 Nov 2016 14:05:44 +0300 Subject: [PATCH 0323/1442] iommu/amd: Missing error code in amd_iommu_init_device() commit 24c790fbf5d8f54c8c82979db11edea8855b74bf upstream. We should set "ret" to -EINVAL if iommu_group_get() fails. Fixes: 55c99a4dc50f ("iommu/amd: Use iommu_attach_group()") Signed-off-by: Dan Carpenter Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu_v2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 594849a3a9be..f8ed8c95b685 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -805,8 +805,10 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) goto out_free_domain; group = iommu_group_get(&pdev->dev); - if (!group) + if (!group) { + ret = -EINVAL; goto out_free_domain; + } ret = iommu_attach_group(dev_state->domain, group); if (ret != 0) -- GitLab From 2148835de3c2410bfb6c64037187a1b72f6d6488 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Mon, 12 Dec 2016 07:28:26 -0500 Subject: [PATCH 0324/1442] iommu/amd: Fix the left value check of cmd buffer commit 432abf68a79332282329286d190e21fe3ac02a31 upstream. The generic command buffer entry is 128 bits (16 bytes), so the offset of tail and head pointer should be 16 bytes aligned and increased with 0x10 per command. When cmd buf is full, head = (tail + 0x10) % CMD_BUFFER_SIZE. So when left space of cmd buf should be able to store only two command, we should be issued one COMPLETE_WAIT additionally to wait all older commands completed. Then the left space should be increased after IOMMU fetching from cmd buf. So left check value should be left <= 0x20 (two commands). Signed-off-by: Huang Rui Fixes: ac0ea6e92b222 ('x86/amd-iommu: Improve handling of full command buffer') Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 754595ee11b6..11a13b5be73a 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1021,7 +1021,7 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu, next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; left = (head - next_tail) % CMD_BUFFER_SIZE; - if (left <= 2) { + if (left <= 0x20) { struct iommu_cmd sync_cmd; int ret; -- GitLab From ef41459ab279edd5fa11da4efac4aa0155c06a35 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Tue, 6 Dec 2016 10:14:23 -0800 Subject: [PATCH 0325/1442] iommu/vt-d: Fix pasid table size encoding commit 65ca7f5f7d1cdde6c25172fe6107cd16902f826f upstream. Different encodings are used to represent supported PASID bits and number of PASID table entries. The current code assigns ecap_pss directly to extended context table entry PTS which is wrong and could result in writing non-zero bits to the reserved fields. IOMMU fault reason 11 will be reported when reserved bits are nonzero. This patch converts ecap_pss to extend context entry pts encoding based on VT-d spec. Chapter 9.4 as follows: - number of PASID bits = ecap_pss + 1 - number of PASID table entries = 2^(pts + 5) Software assigned limit of pasid_max value is also respected to match the allocation limitation of PASID table. cc: Mika Kuoppala cc: Ashok Raj Signed-off-by: Jacob Pan Tested-by: Mika Kuoppala Fixes: 2f26e0a9c9860 ('iommu/vt-d: Add basic SVM PASID support') Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d8376c2d18b3..96566d504d2a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5197,6 +5197,25 @@ static void intel_iommu_remove_device(struct device *dev) } #ifdef CONFIG_INTEL_IOMMU_SVM +#define MAX_NR_PASID_BITS (20) +static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu) +{ + /* + * Convert ecap_pss to extend context entry pts encoding, also + * respect the soft pasid_max value set by the iommu. + * - number of PASID bits = ecap_pss + 1 + * - number of PASID table entries = 2^(pts + 5) + * Therefore, pts = ecap_pss - 4 + * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15 + */ + if (ecap_pss(iommu->ecap) < 5) + return 0; + + /* pasid_max is encoded as actual number of entries not the bits */ + return find_first_bit((unsigned long *)&iommu->pasid_max, + MAX_NR_PASID_BITS) - 5; +} + int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) { struct device_domain_info *info; @@ -5229,7 +5248,9 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd if (!(ctx_lo & CONTEXT_PASIDE)) { context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); - context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap); + context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | + intel_iommu_get_pts(iommu); + wmb(); /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both * extended to permit requests-with-PASID if the PASIDE bit -- GitLab From afd7e2b4258aa4b01720a2d65846030b3a06689f Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Mon, 5 Dec 2016 20:09:07 +0800 Subject: [PATCH 0326/1442] iommu/vt-d: Flush old iommu caches for kdump when the device gets context mapped commit aec0e86172a79eb5e44aff1055bb953fe4d47c59 upstream. We met the DMAR fault both on hpsa P420i and P421 SmartArray controllers under kdump, it can be steadily reproduced on several different machines, the dmesg log is like: HP HPSA Driver (v 3.4.16-0) hpsa 0000:02:00.0: using doorbell to reset controller hpsa 0000:02:00.0: board ready after hard reset. hpsa 0000:02:00.0: Waiting for controller to respond to no-op DMAR: Setting identity map for device 0000:02:00.0 [0xe8000 - 0xe8fff] DMAR: Setting identity map for device 0000:02:00.0 [0xf4000 - 0xf4fff] DMAR: Setting identity map for device 0000:02:00.0 [0xbdf6e000 - 0xbdf6efff] DMAR: Setting identity map for device 0000:02:00.0 [0xbdf6f000 - 0xbdf7efff] DMAR: Setting identity map for device 0000:02:00.0 [0xbdf7f000 - 0xbdf82fff] DMAR: Setting identity map for device 0000:02:00.0 [0xbdf83000 - 0xbdf84fff] DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Read] Request device [02:00.0] fault addr fffff000 [fault reason 06] PTE Read access is not set hpsa 0000:02:00.0: controller message 03:00 timed out hpsa 0000:02:00.0: no-op failed; re-trying After some debugging, we found that the fault addr is from DMA initiated at the driver probe stage after reset(not in-flight DMA), and the corresponding pte entry value is correct, the fault is likely due to the old iommu caches of the in-flight DMA before it. Thus we need to flush the old cache after context mapping is setup for the device, where the device is supposed to finish reset at its driver probe stage and no in-flight DMA exists hereafter. I'm not sure if the hardware is responsible for invalidating all the related caches allocated in the iommu hardware before, but seems not the case for hpsa, actually many device drivers have problems in properly resetting the hardware. Anyway flushing (again) by software in kdump kernel when the device gets context mapped which is a quite infrequent operation does little harm. With this patch, the problematic machine can survive the kdump tests. CC: Myron Stowe CC: Joseph Szczypek CC: Don Brace CC: Baoquan He CC: Dave Young Fixes: 091d42e43d21 ("iommu/vt-d: Copy translation tables from old kernel") Fixes: dbcd861f252d ("iommu/vt-d: Do not re-use domain-ids from the old kernel") Fixes: cf484d0e6939 ("iommu/vt-d: Mark copied context entries") Signed-off-by: Xunlei Pang Tested-by: Don Brace Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 96566d504d2a..d82637ab09fd 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2037,6 +2037,25 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_present(context)) goto out_unlock; + /* + * For kdump cases, old valid entries may be cached due to the + * in-flight DMA and copied pgtable, but there is no unmapping + * behaviour for them, thus we need an explicit cache flush for + * the newly-mapped device. For kdump, at this point, the device + * is supposed to finish reset at its driver probe stage, so no + * in-flight DMA will exist, and we don't need to worry anymore + * hereafter. + */ + if (context_copied(context)) { + u16 did_old = context_domain_id(context); + + if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) + iommu->flush.flush_context(iommu, did_old, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + } + pgd = domain->pgd; context_clear_entry(context); -- GitLab From 311742c40da94fed4e4492c38ed456963ed4ecd6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 8 Nov 2016 14:38:52 +0100 Subject: [PATCH 0327/1442] ASoC: lpass-platform: initialize dma channel number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3b89e4b77ef9c2f985964fab17032db98f074ed0 upstream. A bugfix accidentally removed the implicit initialization of the dma channel number, causing undefined behavior when v->alloc_dma_channel is NULL: sound/soc/qcom/lpass-platform.c: In function ‘lpass_platform_pcmops_open’: sound/soc/qcom/lpass-platform.c:83:29: error: ‘dma_ch’ may be used uninitialized in this function [-Werror=maybe-uninitialized] This adds back an explicit initialization to zero, restoring the previous behavior for that case. Fixes: 022d00ee0b55 ("ASoC: lpass-platform: Fix broken pcm data usage") Signed-off-by: Arnd Bergmann Acked-by: Kenneth Westfield Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/qcom/lpass-platform.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index b392e51de94d..420d200f9a05 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -78,6 +78,9 @@ static int lpass_platform_pcmops_open(struct snd_pcm_substream *substream) dma_ch = 0; if (v->alloc_dma_channel) dma_ch = v->alloc_dma_channel(drvdata, dir); + else + dma_ch = 0; + if (dma_ch < 0) return dma_ch; -- GitLab From d61a969f0e941de5ec682d6236cc9d5fa54bf85b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Nov 2016 18:11:08 +0100 Subject: [PATCH 0328/1442] ASoC: cht_bsw_rt5645: Fix leftover kmalloc commit a823a17981a73faa115bc0f7eda0190763075e2c upstream. cht_bsw_rt5645 driver allocates the own codec_id string but doesn't release it. For simplicity, put the string in cht_mc_private; then the string is allocated in a shot and released altogether. Fixes: c8560b7c917f ("ASoC: cht_bsw_rt5645: Fix writing to string literal") Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/boards/cht_bsw_rt5645.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index 56056ed7fcfd..16c94c45ce50 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -44,6 +44,7 @@ struct cht_acpi_card { struct cht_mc_private { struct snd_soc_jack jack; struct cht_acpi_card *acpi_card; + char codec_name[16]; }; static inline struct snd_soc_dai *cht_get_codec_dai(struct snd_soc_card *card) @@ -354,7 +355,6 @@ static int snd_cht_mc_probe(struct platform_device *pdev) int i; struct cht_mc_private *drv; struct snd_soc_card *card = snd_soc_cards[0].soc_card; - char codec_name[16]; struct sst_acpi_mach *mach; const char *i2c_name = NULL; int dai_index = 0; @@ -374,12 +374,12 @@ static int snd_cht_mc_probe(struct platform_device *pdev) } card->dev = &pdev->dev; mach = card->dev->platform_data; - sprintf(codec_name, "i2c-%s:00", drv->acpi_card->codec_id); + sprintf(drv->codec_name, "i2c-%s:00", drv->acpi_card->codec_id); /* set correct codec name */ for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) if (!strcmp(card->dai_link[i].codec_name, "i2c-10EC5645:00")) { - card->dai_link[i].codec_name = kstrdup(codec_name, GFP_KERNEL); + card->dai_link[i].codec_name = drv->codec_name; dai_index = i; } -- GitLab From 574bac4f402239d8dc5cb15952a93eba0602c646 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Oct 2016 11:55:48 +0300 Subject: [PATCH 0329/1442] ASoC: Intel: Skylake: Fix a shift wrapping bug commit c8eabf821cac120afb78ca251b07cbf520406a7e upstream. "*val" is a u64. It definitely looks like we intend to use the high 32 bits as well. Fixes: 700a9a63f9c1 ("ASoC: Intel: Skylake: Add module instance id generation APIs") Signed-off-by: Dan Carpenter Acked-by: Vinod Koul Tested-by: Kranthi G Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/skylake/skl-sst-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-sst-utils.c b/sound/soc/intel/skylake/skl-sst-utils.c index 8dc03039b311..ea162fbf68e5 100644 --- a/sound/soc/intel/skylake/skl-sst-utils.c +++ b/sound/soc/intel/skylake/skl-sst-utils.c @@ -179,7 +179,7 @@ static inline int skl_getid_32(struct uuid_module *module, u64 *val, index = ffz(mask_val); pvt_id = index + word1_mask + word2_mask; if (pvt_id <= (max_inst - 1)) { - *val |= 1 << (index + word1_mask); + *val |= 1ULL << (index + word1_mask); return pvt_id; } } -- GitLab From d451b3cc89c7f0c037281272d70539cd612ba539 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 21 Oct 2016 14:18:48 +0100 Subject: [PATCH 0330/1442] ASoC: samsung: i2s: Fixup last IRQ unsafe spin lock call commit 5faf071d08ddd1c1be66deaa93a09ccf43f5b538 upstream. Unfortunately, I seem to have missed a case where an IRQ safe spinlock was required, in samsung_i2s_dai_remove, when I fixed up the other calls in this patch: 316fa9e09ad7 ("ASoC: samsung: Use IRQ safe spin lock calls") This causes a lockdep warning when unbinding and rebinding the audio card: [ 104.357664] CPU0 CPU1 [ 104.362174] ---- ---- [ 104.366692] lock(&(&pri_dai->spinlock)->rlock); [ 104.371372] local_irq_disable(); [ 104.377283] lock(&(&substream->self_group.lock)->rlock); [ 104.385259] lock(&(&pri_dai->spinlock)->rlock); [ 104.392469] [ 104.395072] lock(&(&substream->self_group.lock)->rlock); [ 104.400710] [ 104.400710] *** DEADLOCK *** Fixes: ce8bcdbb61d9 ("ASoC: samsung: i2s: Protect more registers with a spinlock") Signed-off-by: Charles Keepax Reviewed-by: Krzysztof Kozlowski Reviewed-by: Sylwester Nawrocki Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/samsung/i2s.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index 7825bff45ae3..85324e61cbd5 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -1029,12 +1029,13 @@ static int samsung_i2s_dai_probe(struct snd_soc_dai *dai) static int samsung_i2s_dai_remove(struct snd_soc_dai *dai) { struct i2s_dai *i2s = snd_soc_dai_get_drvdata(dai); + unsigned long flags; if (!is_secondary(i2s)) { if (i2s->quirks & QUIRK_NEED_RSTCLR) { - spin_lock(i2s->lock); + spin_lock_irqsave(i2s->lock, flags); writel(0, i2s->addr + I2SCON); - spin_unlock(i2s->lock); + spin_unlock_irqrestore(i2s->lock, flags); } } -- GitLab From 5157e98aa024c5ef3d5c3b27cb7203f34073ed26 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Fri, 11 Nov 2016 10:00:20 +1100 Subject: [PATCH 0331/1442] scsi: g_NCR5380: Fix release_region in error handling commit 7b93ca43b7e21fbe6fb1a6f4ecce4a2f70f424a0 upstream. When a SW-configurable card is specified but not found, the driver releases wrong region, causing the following message in kernel log: Trying to free nonexistent resource <0000000000000000-000000000000000f> Fix it by assigning base earlier. Signed-off-by: Ondrej Zary Fixes: a8cfbcaec0c1 ("scsi: g_NCR5380: Stop using scsi_module.c") Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/g_NCR5380.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c index cbf010324c18..596a75924d90 100644 --- a/drivers/scsi/g_NCR5380.c +++ b/drivers/scsi/g_NCR5380.c @@ -170,12 +170,12 @@ static int generic_NCR5380_init_one(struct scsi_host_template *tpnt, if (ports[i]) { /* At this point we have our region reserved */ magic_configure(i, 0, magic); /* no IRQ yet */ - outb(0xc0, ports[i] + 9); - if (inb(ports[i] + 9) != 0x80) { + base = ports[i]; + outb(0xc0, base + 9); + if (inb(base + 9) != 0x80) { ret = -ENODEV; goto out_release; } - base = ports[i]; port_idx = i; } else return -EINVAL; -- GitLab From 962a12f8e523b984d77cb5063a722a8d9536cffc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Nov 2016 16:08:34 +0100 Subject: [PATCH 0332/1442] scsi: mvsas: fix command_active typo commit af15769ffab13d777e55fdef09d0762bf0c249c4 upstream. gcc-7 notices that the condition in mvs_94xx_command_active looks suspicious: drivers/scsi/mvsas/mv_94xx.c: In function 'mvs_94xx_command_active': drivers/scsi/mvsas/mv_94xx.c:671:15: error: '<<' in boolean context, did you mean '<' ? [-Werror=int-in-bool-context] This was introduced when the mv_printk() statement got added, and leads to the condition being ignored. This is probably harmless. Changing '&&' to '&' makes the code look reasonable, as we check the command bit before setting and printing it. Fixes: a4632aae8b66 ("[SCSI] mvsas: Add new macros and functions") Signed-off-by: Arnd Bergmann Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mvsas/mv_94xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mvsas/mv_94xx.c b/drivers/scsi/mvsas/mv_94xx.c index 4c57d9abce7b..7de5d8d75480 100644 --- a/drivers/scsi/mvsas/mv_94xx.c +++ b/drivers/scsi/mvsas/mv_94xx.c @@ -668,7 +668,7 @@ static void mvs_94xx_command_active(struct mvs_info *mvi, u32 slot_idx) { u32 tmp; tmp = mvs_cr32(mvi, MVS_COMMAND_ACTIVE+(slot_idx >> 3)); - if (tmp && 1 << (slot_idx % 32)) { + if (tmp & 1 << (slot_idx % 32)) { mv_printk("command active %08X, slot [%x].\n", tmp, slot_idx); mvs_cw32(mvi, MVS_COMMAND_ACTIVE + (slot_idx >> 3), 1 << (slot_idx % 32)); -- GitLab From 0e79a6b4567704970076a5b933bc1736234610ec Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 13 Dec 2016 15:27:04 +0300 Subject: [PATCH 0333/1442] target/iscsi: Fix double free in lio_target_tiqn_addtpg() commit a91918cd3ea11f91c68e08e1e8ce1b560447a80e upstream. This iscsit_tpg_add_portal_group() function is only called from lio_target_tiqn_addtpg(). Both functions free the "tpg" pointer on error so it's a double free bug. The memory is allocated in the caller so it should be freed in the caller and not here. Fixes: e48354ce078c ("iscsi-target: Add iSCSI fabric support for target v4.1") Signed-off-by: Dan Carpenter Reviewed-by: David Disseldorp [ bvanassche: Added "Fix" at start of patch title ] Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_tpg.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 0814e5894a96..205a509b0dfb 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -260,7 +260,6 @@ int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_gro iscsi_release_param_list(tpg->param_list); tpg->param_list = NULL; } - kfree(tpg); return -ENOMEM; } -- GitLab From 3dd50a5e2be97d1c68545a44851be97294bfdcc7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Nov 2016 15:40:31 -0800 Subject: [PATCH 0334/1442] sbp-target: Fix second argument of percpu_ida_alloc() commit 8456066a57940b3884aa080c58b166567dc9de39 upstream. Pass a task state as second argument to percpu_ida_alloc(). Fixes: commit 5a3ee221b543 ("sbp-target: Conversion to percpu_ida tag pre-allocation") Signed-off-by: Bart Van Assche Cc: Chris Boot Signed-off-by: Greg Kroah-Hartman --- drivers/target/sbp/sbp_target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 58bb6ed18185..6ca388eca33b 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -928,7 +928,7 @@ static struct sbp_target_request *sbp_mgt_get_req(struct sbp_session *sess, struct sbp_target_request *req; int tag; - tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC); + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING); if (tag < 0) return ERR_PTR(-ENOMEM); -- GitLab From c4c11b4bdbf684540f9baf9f17fc1c7fb688f1fb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Dec 2016 15:05:38 -0800 Subject: [PATCH 0335/1442] relay: check array offset before using it commit 9a29d0fbc2d9ad99fb8a981ab72548cc360e9d4c upstream. Smatch complains that we started using the array offset before we checked that it was valid. Fixes: 017c59c042d0 ('relay: Use per CPU constructs for the relay channel buffer pointers') Link: http://lkml.kernel.org/r/20161013084947.GC16198@mwanda Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/relay.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/relay.c b/kernel/relay.c index da79a109dbeb..8f18d314a96a 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -809,11 +809,11 @@ void relay_subbufs_consumed(struct rchan *chan, { struct rchan_buf *buf; - if (!chan) + if (!chan || cpu >= NR_CPUS) return; buf = *per_cpu_ptr(chan->buf, cpu); - if (cpu >= NR_CPUS || !buf || subbufs_consumed > chan->n_subbufs) + if (!buf || subbufs_consumed > chan->n_subbufs) return; if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed) -- GitLab From 825e6a0f7c51cc2c7e56604d2ea89c86aebef914 Mon Sep 17 00:00:00 2001 From: Eric Richter Date: Thu, 13 Oct 2016 17:47:36 -0500 Subject: [PATCH 0336/1442] ima: fix memory leak in ima_release_policy commit 9a11a18902bc3b904353063763d06480620245a6 upstream. When the "policy" securityfs file is opened for read, it is opened as a sequential file. However, when it is eventually released, there is no cleanup for the sequential file, therefore some memory is leaked. This patch adds a call to seq_release() in ima_release_policy() to clean up the memory when the file is opened for read. Fixes: 80eae209d63a IMA: allow reading back the current policy Reported-by: Colin Ian King Signed-off-by: Eric Richter Tested-by: Colin Ian King Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index c07a3844ea0a..3df46906492d 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -401,7 +401,7 @@ static int ima_release_policy(struct inode *inode, struct file *file) const char *cause = valid_policy ? "completed" : "failed"; if ((file->f_flags & O_ACCMODE) == O_RDONLY) - return 0; + return seq_release(inode, file); if (valid_policy && ima_check_policy() < 0) { cause = "failed"; -- GitLab From 5cbea795f4d13184ff42307daea5a7dc7c046e5f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 8 Nov 2016 00:43:54 -0700 Subject: [PATCH 0337/1442] PCI/MSI: Check for NULL affinity mask in pci_irq_get_affinity() commit d1d111e073840b8dbc1ae90ba3fc274736451bdc upstream. If msi_setup_entry() fails to allocate an affinity mask, it logs a message but continues on and allocates an MSI entry with entry->affinity == NULL. Check for this case in pci_irq_get_affinity() so we don't try to dereference a NULL pointer. [bhelgaas: changelog] Fixes: ee8d41e53efe "pci/msi: Retrieve affinity for a vector" Signed-off-by: Jan Beulich Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig CC: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/pci/msi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index ad70507cfb56..3455f752d5e4 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1294,7 +1294,8 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) } else if (dev->msi_enabled) { struct msi_desc *entry = first_pci_msi_entry(dev); - if (WARN_ON_ONCE(!entry || nr >= entry->nvec_used)) + if (WARN_ON_ONCE(!entry || !entry->affinity || + nr >= entry->nvec_used)) return NULL; return &entry->affinity[nr]; -- GitLab From 3b198ddd5855a5073bac27571394963b87d26613 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 31 Oct 2016 14:17:35 -0700 Subject: [PATCH 0338/1442] irqchip/bcm7038-l1: Implement irq_cpu_offline() callback commit 34c535793bcbf9263cf22f8a52101f796cdfab8e upstream. We did not implement an irq_cpu_offline callback for our irqchip, yet we support setting a given IRQ's affinity. This resulted in interrupts whose affinity mask included CPUs being taken offline not to work correctly once the CPU had been put offline. Fixes: 5f7f0317ed28 ("IRQCHIP: Add new driver for BCM7038-style level 1 interrupt controllers") Signed-off-by: Florian Fainelli Cc: linux-mips@linux-mips.org Cc: jason@lakedaemon.net Cc: marc.zyngier@arm.com Cc: cernekee@gmail.com Cc: jaedon.shin@gmail.com Cc: ralf@linux-mips.org Cc: justinpopo6@gmail.com Link: http://lkml.kernel.org/r/1477948656-12966-2-git-send-email-f.fainelli@gmail.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-bcm7038-l1.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c index 353c54986211..c2662a1bfdd3 100644 --- a/drivers/irqchip/irq-bcm7038-l1.c +++ b/drivers/irqchip/irq-bcm7038-l1.c @@ -215,6 +215,31 @@ static int bcm7038_l1_set_affinity(struct irq_data *d, return 0; } +static void bcm7038_l1_cpu_offline(struct irq_data *d) +{ + struct cpumask *mask = irq_data_get_affinity_mask(d); + int cpu = smp_processor_id(); + cpumask_t new_affinity; + + /* This CPU was not on the affinity mask */ + if (!cpumask_test_cpu(cpu, mask)) + return; + + if (cpumask_weight(mask) > 1) { + /* + * Multiple CPU affinity, remove this CPU from the affinity + * mask + */ + cpumask_copy(&new_affinity, mask); + cpumask_clear_cpu(cpu, &new_affinity); + } else { + /* Only CPU, put on the lowest online CPU */ + cpumask_clear(&new_affinity); + cpumask_set_cpu(cpumask_first(cpu_online_mask), &new_affinity); + } + irq_set_affinity_locked(d, &new_affinity, false); +} + static int __init bcm7038_l1_init_one(struct device_node *dn, unsigned int idx, struct bcm7038_l1_chip *intc) @@ -266,6 +291,7 @@ static struct irq_chip bcm7038_l1_irq_chip = { .irq_mask = bcm7038_l1_mask, .irq_unmask = bcm7038_l1_unmask, .irq_set_affinity = bcm7038_l1_set_affinity, + .irq_cpu_offline = bcm7038_l1_cpu_offline, }; static int bcm7038_l1_map(struct irq_domain *d, unsigned int virq, -- GitLab From 65f796837e0051e11334b0c075b153d5d3d4d2c5 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 5 Dec 2016 16:38:16 -0800 Subject: [PATCH 0339/1442] PM / wakeirq: Fix dedicated wakeirq for drivers not using autosuspend commit bed570307ed78f21b77cb04a1df781dee4a8f05a upstream. I noticed some wakeirq flakeyness with consumer drivers not using autosuspend. For drivers not using autosuspend, the wakeirq may never get unmasked in rpm_suspend() because of irq desc->depth. We are configuring dedicated wakeirqs to start with IRQ_NOAUTOEN as we naturally don't want them running until rpm_suspend() is called. However, when a consumer driver initially calls pm_runtime_get(), we now wrongly start with disable_irq_nosync() call on the dedicated wakeirq that is disabled to start with. This causes desc->depth to toggle between 1 and 2 instead of the usual 0 and 1. This can prevent enable_irq() from unmasking the wakeirq as that only happens at desc->depth 1. This does not necessarily show up with drivers using autosuspend as there is time for disable_irq_nosync() before rpm_suspend() gets called after the autosuspend timeout. Let's fix the issue by adding wirq->status that lazily gets set on the first rpm_suspend(). We also need PM runtime core private functions for dev_pm_enable_wake_irq_check() and dev_pm_disable_wake_irq_check() so we can enable the dedicated wakeirq on the first rpm_suspend(). While at it, let's also fix the comments for dev_pm_enable_wake_irq() and dev_pm_disable_wake_irq(). Those can still be used by the consumer drivers as needed because the IRQ core manages the interrupt usecount for us. Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling) Signed-off-by: Tony Lindgren Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/power.h | 19 ++++++++- drivers/base/power/runtime.c | 8 ++-- drivers/base/power/wakeirq.c | 76 +++++++++++++++++++++++++++++++----- 3 files changed, 88 insertions(+), 15 deletions(-) diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 50e30e7b059d..a84332aefc2d 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -21,14 +21,22 @@ extern void pm_runtime_init(struct device *dev); extern void pm_runtime_reinit(struct device *dev); extern void pm_runtime_remove(struct device *dev); +#define WAKE_IRQ_DEDICATED_ALLOCATED BIT(0) +#define WAKE_IRQ_DEDICATED_MANAGED BIT(1) +#define WAKE_IRQ_DEDICATED_MASK (WAKE_IRQ_DEDICATED_ALLOCATED | \ + WAKE_IRQ_DEDICATED_MANAGED) + struct wake_irq { struct device *dev; + unsigned int status; int irq; - bool dedicated_irq:1; }; extern void dev_pm_arm_wake_irq(struct wake_irq *wirq); extern void dev_pm_disarm_wake_irq(struct wake_irq *wirq); +extern void dev_pm_enable_wake_irq_check(struct device *dev, + bool can_change_status); +extern void dev_pm_disable_wake_irq_check(struct device *dev); #ifdef CONFIG_PM_SLEEP @@ -104,6 +112,15 @@ static inline void dev_pm_disarm_wake_irq(struct wake_irq *wirq) { } +static inline void dev_pm_enable_wake_irq_check(struct device *dev, + bool can_change_status) +{ +} + +static inline void dev_pm_disable_wake_irq_check(struct device *dev) +{ +} + #endif #ifdef CONFIG_PM_SLEEP diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 82a081ea4317..23f3b95a1158 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -515,7 +515,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) callback = RPM_GET_CALLBACK(dev, runtime_suspend); - dev_pm_enable_wake_irq(dev); + dev_pm_enable_wake_irq_check(dev, true); retval = rpm_callback(callback, dev); if (retval) goto fail; @@ -554,7 +554,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) return retval; fail: - dev_pm_disable_wake_irq(dev); + dev_pm_disable_wake_irq_check(dev); __update_runtime_status(dev, RPM_ACTIVE); dev->power.deferred_resume = false; wake_up_all(&dev->power.wait_queue); @@ -737,12 +737,12 @@ static int rpm_resume(struct device *dev, int rpmflags) callback = RPM_GET_CALLBACK(dev, runtime_resume); - dev_pm_disable_wake_irq(dev); + dev_pm_disable_wake_irq_check(dev); retval = rpm_callback(callback, dev); if (retval) { __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_cancel_pending(dev); - dev_pm_enable_wake_irq(dev); + dev_pm_enable_wake_irq_check(dev, false); } else { no_callback: __update_runtime_status(dev, RPM_ACTIVE); diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 0d77cd6fd8d1..404d94c6c8bc 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -110,8 +110,10 @@ void dev_pm_clear_wake_irq(struct device *dev) dev->power.wakeirq = NULL; spin_unlock_irqrestore(&dev->power.lock, flags); - if (wirq->dedicated_irq) + if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED) { free_irq(wirq->irq, wirq); + wirq->status &= ~WAKE_IRQ_DEDICATED_MASK; + } kfree(wirq); } EXPORT_SYMBOL_GPL(dev_pm_clear_wake_irq); @@ -179,7 +181,6 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) wirq->dev = dev; wirq->irq = irq; - wirq->dedicated_irq = true; irq_set_status_flags(irq, IRQ_NOAUTOEN); /* @@ -195,6 +196,8 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) if (err) goto err_free_irq; + wirq->status = WAKE_IRQ_DEDICATED_ALLOCATED; + return err; err_free_irq: @@ -210,9 +213,9 @@ EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq); * dev_pm_enable_wake_irq - Enable device wake-up interrupt * @dev: Device * - * Called from the bus code or the device driver for - * runtime_suspend() to enable the wake-up interrupt while - * the device is running. + * Optionally called from the bus code or the device driver for + * runtime_resume() to override the PM runtime core managed wake-up + * interrupt handling to enable the wake-up interrupt. * * Note that for runtime_suspend()) the wake-up interrupts * should be unconditionally enabled unlike for suspend() @@ -222,7 +225,7 @@ void dev_pm_enable_wake_irq(struct device *dev) { struct wake_irq *wirq = dev->power.wakeirq; - if (wirq && wirq->dedicated_irq) + if (wirq && (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED)) enable_irq(wirq->irq); } EXPORT_SYMBOL_GPL(dev_pm_enable_wake_irq); @@ -231,19 +234,72 @@ EXPORT_SYMBOL_GPL(dev_pm_enable_wake_irq); * dev_pm_disable_wake_irq - Disable device wake-up interrupt * @dev: Device * - * Called from the bus code or the device driver for - * runtime_resume() to disable the wake-up interrupt while - * the device is running. + * Optionally called from the bus code or the device driver for + * runtime_suspend() to override the PM runtime core managed wake-up + * interrupt handling to disable the wake-up interrupt. */ void dev_pm_disable_wake_irq(struct device *dev) { struct wake_irq *wirq = dev->power.wakeirq; - if (wirq && wirq->dedicated_irq) + if (wirq && (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED)) disable_irq_nosync(wirq->irq); } EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq); +/** + * dev_pm_enable_wake_irq_check - Checks and enables wake-up interrupt + * @dev: Device + * @can_change_status: Can change wake-up interrupt status + * + * Enables wakeirq conditionally. We need to enable wake-up interrupt + * lazily on the first rpm_suspend(). This is needed as the consumer device + * starts in RPM_SUSPENDED state, and the the first pm_runtime_get() would + * otherwise try to disable already disabled wakeirq. The wake-up interrupt + * starts disabled with IRQ_NOAUTOEN set. + * + * Should be only called from rpm_suspend() and rpm_resume() path. + * Caller must hold &dev->power.lock to change wirq->status + */ +void dev_pm_enable_wake_irq_check(struct device *dev, + bool can_change_status) +{ + struct wake_irq *wirq = dev->power.wakeirq; + + if (!wirq || !((wirq->status & WAKE_IRQ_DEDICATED_MASK))) + return; + + if (likely(wirq->status & WAKE_IRQ_DEDICATED_MANAGED)) { + goto enable; + } else if (can_change_status) { + wirq->status |= WAKE_IRQ_DEDICATED_MANAGED; + goto enable; + } + + return; + +enable: + enable_irq(wirq->irq); +} + +/** + * dev_pm_disable_wake_irq_check - Checks and disables wake-up interrupt + * @dev: Device + * + * Disables wake-up interrupt conditionally based on status. + * Should be only called from rpm_suspend() and rpm_resume() path. + */ +void dev_pm_disable_wake_irq_check(struct device *dev) +{ + struct wake_irq *wirq = dev->power.wakeirq; + + if (!wirq || !((wirq->status & WAKE_IRQ_DEDICATED_MASK))) + return; + + if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED) + disable_irq_nosync(wirq->irq); +} + /** * dev_pm_arm_wake_irq - Arm device wake-up * @wirq: Device wake-up interrupt -- GitLab From 74e365e69687bb250fa71ba5ba389cd2680d843f Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 14 Dec 2016 16:01:12 -0200 Subject: [PATCH 0340/1442] genirq/affinity: Fix node generation from cpumask commit c0af52437254fda8b0cdbaae5a9b6d9327f1fcd5 upstream. Commit 34c3d9819fda ("genirq/affinity: Provide smarter irq spreading infrastructure") introduced a better IRQ spreading mechanism, taking account of the available NUMA nodes in the machine. Problem is that the algorithm of retrieving the nodemask iterates "linearly" based on the number of online nodes - some architectures present non-linear node distribution among the nodemask, like PowerPC. If this is the case, the algorithm lead to a wrong node count number and therefore to a bad/incomplete IRQ affinity distribution. For example, this problem were found in a machine with 128 CPUs and two nodes, namely nodes 0 and 8 (instead of 0 and 1, if it was linearly distributed). This led to a wrong affinity distribution which then led to a bad mq allocation for nvme driver. Finally, we take the opportunity to fix a comment regarding the affinity distribution when we have _more_ nodes than vectors. Fixes: 34c3d9819fda ("genirq/affinity: Provide smarter irq spreading infrastructure") Reported-by: Gabriel Krisman Bertazi Signed-off-by: Guilherme G. Piccoli Reviewed-by: Christoph Hellwig Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Gavin Shan Cc: linux-pci@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: hch@lst.de Link: http://lkml.kernel.org/r/1481738472-2671-1-git-send-email-gpiccoli@linux.vnet.ibm.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/irq/affinity.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 17f51d63da56..668f51b861f7 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -37,10 +37,10 @@ static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk) { - int n, nodes; + int n, nodes = 0; /* Calculate the number of nodes in the supplied affinity mask */ - for (n = 0, nodes = 0; n < num_online_nodes(); n++) { + for_each_online_node(n) { if (cpumask_intersects(mask, cpumask_of_node(n))) { node_set(n, *nodemsk); nodes++; @@ -81,7 +81,7 @@ struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, nodes = get_nodes_in_cpumask(affinity, &nodemsk); /* - * If the number of nodes in the mask is less than or equal the + * If the number of nodes in the mask is greater than or equal the * number of vectors we just spread the vectors across the nodes. */ if (nvec <= nodes) { -- GitLab From 1d8286ccbcc6f2f5abca386430825e4c3983edc7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 14 Nov 2016 14:31:34 +0300 Subject: [PATCH 0341/1442] mmc: mmc_test: Uninitialized return value commit 16652a936e96f5dae53c3fbd38a570497baadaa8 upstream. We never set "ret" to RESULT_OK. Fixes: 9f9c4180f88d ("mmc: mmc_test: add test for non-blocking transfers") Signed-off-by: Dan Carpenter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/card/mmc_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index 3678220964fe..df382be62634 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -818,7 +818,7 @@ static int mmc_test_nonblock_transfer(struct mmc_test_card *test, struct mmc_async_req *cur_areq = &test_areq[0].areq; struct mmc_async_req *other_areq = &test_areq[1].areq; int i; - int ret; + int ret = RESULT_OK; test_areq[0].test = test; test_areq[1].test = test; -- GitLab From 17df3e74fb5174bc9fc078fd640dc3b2762f0ca1 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 1 Dec 2016 16:59:55 -0800 Subject: [PATCH 0342/1442] rpmsg: qcom_smd: Correct return value for O_NONBLOCK commit 1d74e7ed5dc1903ac081574a9b6aa94e7ba4ad45 upstream. qcom_smd_send() should return -EAGAIN for non-blocking channels with insufficient space, so that we can propagate this event to user space. Fixes: 53e2822e56c7 ("rpmsg: Introduce Qualcomm SMD backend") Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/qcom_smd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c index 06fef2b4c814..1d4770c02e57 100644 --- a/drivers/rpmsg/qcom_smd.c +++ b/drivers/rpmsg/qcom_smd.c @@ -739,7 +739,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data, while (qcom_smd_get_tx_avail(channel) < tlen) { if (!wait) { - ret = -ENOMEM; + ret = -EAGAIN; goto out; } -- GitLab From 66c6770379368ba0738a61c2317edc0ca324b902 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 12 Dec 2016 16:41:56 -0800 Subject: [PATCH 0343/1442] mm/hugetlb.c: use the right pte val for compare in hugetlb_cow commit 3999f52e3198e76607446ab1a4610c1ddc406c56 upstream. We cannot use the pte value used in set_pte_at for pte_same comparison, because archs like ppc64, filter/add new pte flag in set_pte_at. Instead fetch the pte value inside hugetlb_cow. We are comparing pte value to make sure the pte didn't change since we dropped the page table lock. hugetlb_cow get called with page table lock held, and we can take a copy of the pte value before we drop the page table lock. With hugetlbfs, we optimize the MAP_PRIVATE write fault path with no previous mapping (huge_pte_none entries), by forcing a cow in the fault path. This avoid take an addition fault to covert a read-only mapping to read/write. Here we were comparing a recently instantiated pte (via set_pte_at) to the pte values from linux page table. As explained above on ppc64 such pte_same check returned wrong result, resulting in us taking an additional fault on ppc64. Fixes: 6a119eae942c ("powerpc/mm: Add a _PAGE_PTE bit") Link: http://lkml.kernel.org/r/20161018154245.18023-1-aneesh.kumar@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V Reported-by: Jan Stancek Acked-by: Hillf Danton Cc: Mike Kravetz Cc: Scott Wood Cc: Michael Ellerman Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 418bf01a50ed..23aec01836aa 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3450,15 +3450,17 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * Keep the pte_same checks anyway to make transition from the mutex easier. */ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, pte_t pte, - struct page *pagecache_page, spinlock_t *ptl) + unsigned long address, pte_t *ptep, + struct page *pagecache_page, spinlock_t *ptl) { + pte_t pte; struct hstate *h = hstate_vma(vma); struct page *old_page, *new_page; int ret = 0, outside_reserve = 0; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ + pte = huge_ptep_get(ptep); old_page = pte_page(pte); retry_avoidcopy: @@ -3733,7 +3735,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, hugetlb_count_add(pages_per_huge_page(h), mm); if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { /* Optimization, do the COW without a second fault */ - ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl); + ret = hugetlb_cow(mm, vma, address, ptep, page, ptl); } spin_unlock(ptl); @@ -3888,8 +3890,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (flags & FAULT_FLAG_WRITE) { if (!huge_pte_write(entry)) { - ret = hugetlb_cow(mm, vma, address, ptep, entry, - pagecache_page, ptl); + ret = hugetlb_cow(mm, vma, address, ptep, + pagecache_page, ptl); goto out_put_page; } entry = huge_pte_mkdirty(entry); -- GitLab From cd6d9ffffc4c2ce545e9d8dfcc7fe562cb02b4ea Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 14 Nov 2016 14:32:27 -0200 Subject: [PATCH 0344/1442] docs-rst: fix LaTeX \DURole renewcommand with Sphinx 1.3+ commit e2a91f4f42018994d7424d405900d17eba6555d0 upstream. PDF build on Kernel 4.9-rc? returns an error with Sphinx 1.3.x and Sphinx 1.4.x, when trying to solve some cross-references. The solution is to redefine the \DURole macro. However, this is redefined too late. Move such redefinition to LaTeX preamble and bind it to just the Sphinx versions where the error is known to be present. Tested by building the documentation on interactive mode: make PDFLATEX=xelatex -C Documentation/output/./latex Fixes: e61a39baf74d ("[media] index.rst: Fix LaTeX error in interactive mode on Sphinx 1.4.x") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- Documentation/conf.py | 6 +++++- Documentation/media/index.rst | 5 ----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Documentation/conf.py b/Documentation/conf.py index bf6f310e5170..d769cd89a9f7 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -37,7 +37,7 @@ from load_config import loadConfig extensions = ['kernel-doc', 'rstFlatTable', 'kernel_include', 'cdomain'] # The name of the math extension changed on Sphinx 1.4 -if minor > 3: +if major == 1 and minor > 3: extensions.append("sphinx.ext.imgmath") else: extensions.append("sphinx.ext.pngmath") @@ -332,6 +332,10 @@ latex_elements = { ''' } +# Fix reference escape troubles with Sphinx 1.4.x +if major == 1 and minor > 3: + latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n' + # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). diff --git a/Documentation/media/index.rst b/Documentation/media/index.rst index e347a3e7bdef..7f8f0af620ce 100644 --- a/Documentation/media/index.rst +++ b/Documentation/media/index.rst @@ -1,11 +1,6 @@ Linux Media Subsystem Documentation =================================== -.. Sphinx 1.4.x has a definition for DUrole that doesn't work on alltt blocks -.. raw:: latex - - \renewcommand*{\DUrole}[2]{ #2 } - Contents: .. toctree:: -- GitLab From 058a4a534c2392ce6a70ea8372b635de2af1d48f Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 12 Dec 2016 16:43:32 -0800 Subject: [PATCH 0345/1442] mm: khugepaged: close use-after-free race during shmem collapsing commit 91a45f71078a6569ec3ca5bef74e1ab58121d80e upstream. Patch series "mm: workingset: radix tree subtleties & single-page file refaults", v3. This is another revision of the radix tree / workingset patches based on feedback from Jan and Kirill. This is a follow-up to d3798ae8c6f3 ("mm: filemap: don't plant shadow entries without radix tree node"). That patch fixed an issue that was caused mainly by the page cache sneaking special shadow page entries into the radix tree and relying on subtleties in the radix tree code to make that work. The fix also had to stop tracking refaults for single-page files because shadow pages stored as direct pointers in radix_tree_root->rnode weren't properly handled during tree extension. These patches make the radix tree code explicitely support and track such special entries, to eliminate the subtleties and to restore the thrash detection for single-page files. This patch (of 9): When a radix tree iteration drops the tree lock, another thread might swoop in and free the node holding the current slot. The iteration needs to do another tree lookup from the current index to continue. [kirill.shutemov@linux.intel.com: re-lookup for replacement] Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages") Link: http://lkml.kernel.org/r/20161117191138.22769-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Kirill A. Shutemov Reviewed-by: Jan Kara Cc: Hugh Dickins Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/khugepaged.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 87e1a7ca3846..2779c63bdea0 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1403,6 +1403,9 @@ static void collapse_shmem(struct mm_struct *mm, spin_lock_irq(&mapping->tree_lock); + slot = radix_tree_lookup_slot(&mapping->page_tree, index); + VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot, + &mapping->tree_lock), page); VM_BUG_ON_PAGE(page_mapped(page), page); /* @@ -1426,6 +1429,7 @@ static void collapse_shmem(struct mm_struct *mm, radix_tree_replace_slot(slot, new_page + (index % HPAGE_PMD_NR)); + slot = radix_tree_iter_next(&iter); index++; continue; out_lru: @@ -1537,6 +1541,7 @@ static void collapse_shmem(struct mm_struct *mm, putback_lru_page(page); unlock_page(page); spin_lock_irq(&mapping->tree_lock); + slot = radix_tree_iter_next(&iter); } VM_BUG_ON(nr_none); spin_unlock_irq(&mapping->tree_lock); -- GitLab From dc1b6d0aed97a5e7c95afd1a2eff20151f440513 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 12 Dec 2016 16:43:35 -0800 Subject: [PATCH 0346/1442] mm: khugepaged: fix radix tree node leak in shmem collapse error path commit 59749e6ce53735d8b696763742225f126e94603f upstream. The radix tree counts valid entries in each tree node. Entries stored in the tree cannot be removed by simpling storing NULL in the slot or the internal counters will be off and the node never gets freed again. When collapsing a shmem page fails, restore the holes that were filled with radix_tree_insert() with a proper radix tree deletion. Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages") Link: http://lkml.kernel.org/r/20161117191138.22769-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reported-by: Jan Kara Acked-by: Kirill A. Shutemov Reviewed-by: Jan Kara Cc: Hugh Dickins Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/khugepaged.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 2779c63bdea0..5d7c006373d3 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1525,9 +1525,11 @@ static void collapse_shmem(struct mm_struct *mm, if (!page || iter.index < page->index) { if (!nr_none) break; - /* Put holes back where they were */ - radix_tree_replace_slot(slot, NULL); nr_none--; + /* Put holes back where they were */ + radix_tree_delete(&mapping->page_tree, + iter.index); + slot = radix_tree_iter_next(&iter); continue; } -- GitLab From 5d7d362abc408e69ee229459fba21f833f2f5bf1 Mon Sep 17 00:00:00 2001 From: Ming Ling Date: Mon, 12 Dec 2016 16:42:26 -0800 Subject: [PATCH 0347/1442] mm, compaction: fix NR_ISOLATED_* stats for pfn based migration commit 6afcf8ef0ca0a69d014f8edb613d94821f0ae700 upstream. Since commit bda807d44454 ("mm: migrate: support non-lru movable page migration") isolate_migratepages_block) can isolate !PageLRU pages which would acct_isolated account as NR_ISOLATED_*. Accounting these non-lru pages NR_ISOLATED_{ANON,FILE} doesn't make any sense and it can misguide heuristics based on those counters such as pgdat_reclaimable_pages resp. too_many_isolated which would lead to unexpected stalls during the direct reclaim without any good reason. Note that __alloc_contig_migrate_range can isolate a lot of pages at once. On mobile devices such as 512M ram android Phone, it may use a big zram swap. In some cases zram(zsmalloc) uses too many non-lru but migratedable pages, such as: MemTotal: 468148 kB Normal free:5620kB Free swap:4736kB Total swap:409596kB ZRAM: 164616kB(zsmalloc non-lru pages) active_anon:60700kB inactive_anon:60744kB active_file:34420kB inactive_file:37532kB Fix this by only accounting lru pages to NR_ISOLATED_* in isolate_migratepages_block right after they were isolated and we still know they were on LRU. Drop acct_isolated because it is called after the fact and we've lost that information. Batching per-cpu counter doesn't make much improvement anyway. Also make sure that we uncharge only LRU pages when putting them back on the LRU in putback_movable_pages resp. when unmap_and_move migrates the page. [mhocko@suse.com: replace acct_isolated() with direct counting] Fixes: bda807d44454 ("mm: migrate: support non-lru movable page migration") Link: http://lkml.kernel.org/r/20161019080240.9682-1-mhocko@kernel.org Signed-off-by: Ming Ling Signed-off-by: Michal Hocko Acked-by: Minchan Kim Acked-by: Vlastimil Babka Cc: Mel Gorman Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/compaction.c | 25 +++---------------------- mm/migrate.c | 15 +++++++++++---- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 0409a4ad6ea1..70e6bec46dc2 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -634,22 +634,6 @@ isolate_freepages_range(struct compact_control *cc, return pfn; } -/* Update the number of anon and file isolated pages in the zone */ -static void acct_isolated(struct zone *zone, struct compact_control *cc) -{ - struct page *page; - unsigned int count[2] = { 0, }; - - if (list_empty(&cc->migratepages)) - return; - - list_for_each_entry(page, &cc->migratepages, lru) - count[!!page_is_file_cache(page)]++; - - mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, count[0]); - mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, count[1]); -} - /* Similar to reclaim, but different enough that they don't share logic */ static bool too_many_isolated(struct zone *zone) { @@ -866,6 +850,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* Successfully isolated */ del_page_from_lru_list(page, lruvec, page_lru(page)); + inc_node_page_state(page, + NR_ISOLATED_ANON + page_is_file_cache(page)); isolate_success: list_add(&page->lru, &cc->migratepages); @@ -902,7 +888,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, spin_unlock_irqrestore(zone_lru_lock(zone), flags); locked = false; } - acct_isolated(zone, cc); putback_movable_pages(&cc->migratepages); cc->nr_migratepages = 0; cc->last_migrated_pfn = 0; @@ -988,7 +973,6 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) break; } - acct_isolated(cc->zone, cc); return pfn; } @@ -1258,10 +1242,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, low_pfn = isolate_migratepages_block(cc, low_pfn, block_end_pfn, isolate_mode); - if (!low_pfn || cc->contended) { - acct_isolated(zone, cc); + if (!low_pfn || cc->contended) return ISOLATE_ABORT; - } /* * Either we isolated something and proceed with migration. Or @@ -1271,7 +1253,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, break; } - acct_isolated(zone, cc); /* Record where migration scanner will be restarted. */ cc->migrate_pfn = low_pfn; diff --git a/mm/migrate.c b/mm/migrate.c index 99250aee1ac1..66ce6b490b13 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -168,8 +168,6 @@ void putback_movable_pages(struct list_head *l) continue; } list_del(&page->lru); - dec_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); /* * We isolated non-lru movable page so here we can use * __PageMovable because LRU page's mapping cannot have @@ -186,6 +184,8 @@ void putback_movable_pages(struct list_head *l) put_page(page); } else { putback_lru_page(page); + dec_node_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); } } } @@ -1121,8 +1121,15 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, * restored. */ list_del(&page->lru); - dec_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + + /* + * Compaction can migrate also non-LRU pages which are + * not accounted to NR_ISOLATED_*. They can be recognized + * as __PageMovable + */ + if (likely(!__PageMovable(page))) + dec_node_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); } /* -- GitLab From a05aa258b237535df67926204b359b1dacc03ed1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 18 Nov 2016 14:11:00 +0300 Subject: [PATCH 0348/1442] s390/crypto: unlock on error in prng_tdes_read() commit 9e6e7c74315095fd40f41003850690c711e44420 upstream. We added some new locking but forgot to unlock on error. Fixes: 57127645d79d ("s390/zcrypt: Introduce new SHA-512 based Pseudo Random Generator.") Signed-off-by: Dan Carpenter Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/crypto/prng.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 9cc050f9536c..1113389d0a39 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -507,8 +507,10 @@ static ssize_t prng_tdes_read(struct file *file, char __user *ubuf, prng_data->prngws.byte_counter += n; prng_data->prngws.reseed_counter += n; - if (copy_to_user(ubuf, prng_data->buf, chunk)) - return -EFAULT; + if (copy_to_user(ubuf, prng_data->buf, chunk)) { + ret = -EFAULT; + break; + } nbytes -= chunk; ret += chunk; -- GitLab From cdeaed7dda7bc0946d9fbf5ea75821c130f95a4b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Oct 2016 19:15:16 +0100 Subject: [PATCH 0349/1442] crypto: arm64/sha2-ce - fix for big endian commit 174122c39c369ed924d2608fc0be0171997ce800 upstream. The SHA256 digest is an array of 8 32-bit quantities, so we should refer to them as such in order for this code to work correctly when built for big endian. So replace 16 byte scalar loads and stores with 4x32 vector ones where appropriate. Fixes: 6ba6c74dfc6b ("arm64/crypto: SHA-224/SHA-256 using ARMv8 Crypto Extensions") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/sha2-ce-core.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 5df9d9d470ad..01cfee066837 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -85,7 +85,7 @@ ENTRY(sha2_ce_transform) ld1 {v12.4s-v15.4s}, [x8] /* load state */ - ldp dga, dgb, [x0] + ld1 {dgav.4s, dgbv.4s}, [x0] /* load sha256_ce_state::finalize */ ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] @@ -148,6 +148,6 @@ CPU_LE( rev32 v19.16b, v19.16b ) b 1b /* store new state */ -3: stp dga, dgb, [x0] +3: st1 {dgav.4s, dgbv.4s}, [x0] ret ENDPROC(sha2_ce_transform) -- GitLab From a7c9666735f4953199bdebdbc04a85e34fff97a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Oct 2016 19:15:14 +0100 Subject: [PATCH 0350/1442] crypto: arm64/ghash-ce - fix for big endian commit 9c433ad5083fd4a4a3c721d86cbfbd0b2a2326a5 upstream. The GHASH key and digest are both pairs of 64-bit quantities, but the GHASH code does not always refer to them as such, causing failures when built for big endian. So replace the 16x1 loads and stores with 2x8 ones. Fixes: b913a6404ce2 ("arm64/crypto: improve performance of GHASH algorithm") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/ghash-ce-core.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index dc457015884e..f0bb9f0b524f 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -29,8 +29,8 @@ * struct ghash_key const *k, const char *head) */ ENTRY(pmull_ghash_update) - ld1 {SHASH.16b}, [x3] - ld1 {XL.16b}, [x1] + ld1 {SHASH.2d}, [x3] + ld1 {XL.2d}, [x1] movi MASK.16b, #0xe1 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 shl MASK.2d, MASK.2d, #57 @@ -74,6 +74,6 @@ CPU_LE( rev64 T1.16b, T1.16b ) cbnz w0, 0b - st1 {XL.16b}, [x1] + st1 {XL.2d}, [x1] ret ENDPROC(pmull_ghash_update) -- GitLab From e6ce55f7be903ec5cecfa0953fb4960d72b961a7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Oct 2016 19:15:20 +0100 Subject: [PATCH 0351/1442] crypto: arm/aes-ce - fix for big endian commit 58010fa6f71c9577922b22e46014b95a4ec80fa0 upstream. The AES key schedule generation is mostly endian agnostic, with the exception of the rotation and the incorporation of the round constant at the start of each round. So implement a big endian specific version of that part to make the whole routine big endian compatible. Fixes: 86464859cc77 ("crypto: arm - AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm/crypto/aes-ce-glue.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index aef022a87c53..04410d9f5e72 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -88,8 +88,13 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, u32 *rki = ctx->key_enc + (i * kwords); u32 *rko = rki + kwords; +#ifndef CONFIG_CPU_BIG_ENDIAN rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8); rko[0] = rko[0] ^ rki[0] ^ rcon[i]; +#else + rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8); + rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24); +#endif rko[1] = rko[0] ^ rki[1]; rko[2] = rko[1] ^ rki[2]; rko[3] = rko[2] ^ rki[3]; -- GitLab From d018dc9540f729699f6f96802e67c1dac1a4769d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Oct 2016 19:15:17 +0100 Subject: [PATCH 0352/1442] crypto: arm64/aes-ccm-ce: fix for big endian commit 56e4e76c68fcb51547b5299e5b66a135935ff414 upstream. The AES-CCM implementation that uses ARMv8 Crypto Extensions instructions refers to the AES round keys as pairs of 64-bit quantities, which causes failures when building the code for big endian. In addition, it byte swaps the input counter unconditionally, while this is only required for little endian builds. So fix both issues. Fixes: 12ac3efe74f8 ("arm64/crypto: use crypto instructions to generate AES key schedule") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-ce-ccm-core.S | 53 +++++++++++++++-------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index a2a7fbcacc14..3363560c79b7 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -9,6 +9,7 @@ */ #include +#include .text .arch armv8-a+crypto @@ -19,7 +20,7 @@ */ ENTRY(ce_aes_ccm_auth_data) ldr w8, [x3] /* leftover from prev round? */ - ld1 {v0.2d}, [x0] /* load mac */ + ld1 {v0.16b}, [x0] /* load mac */ cbz w8, 1f sub w8, w8, #16 eor v1.16b, v1.16b, v1.16b @@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data) beq 8f /* out of input? */ cbnz w8, 0b eor v0.16b, v0.16b, v1.16b -1: ld1 {v3.2d}, [x4] /* load first round key */ +1: ld1 {v3.16b}, [x4] /* load first round key */ prfm pldl1strm, [x1] cmp w5, #12 /* which key size? */ add x6, x4, #16 @@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data) mov v5.16b, v3.16b b 4f 2: mov v4.16b, v3.16b - ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ + ld1 {v5.16b}, [x6], #16 /* load 2nd round key */ 3: aese v0.16b, v4.16b aesmc v0.16b, v0.16b -4: ld1 {v3.2d}, [x6], #16 /* load next round key */ +4: ld1 {v3.16b}, [x6], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b -5: ld1 {v4.2d}, [x6], #16 /* load next round key */ +5: ld1 {v4.16b}, [x6], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b - ld1 {v5.2d}, [x6], #16 /* load next round key */ + ld1 {v5.16b}, [x6], #16 /* load next round key */ bpl 3b aese v0.16b, v4.16b subs w2, w2, #16 /* last data? */ @@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data) ld1 {v1.16b}, [x1], #16 /* load next input block */ eor v0.16b, v0.16b, v1.16b /* xor with mac */ bne 1b -6: st1 {v0.2d}, [x0] /* store mac */ +6: st1 {v0.16b}, [x0] /* store mac */ beq 10f adds w2, w2, #16 beq 10f @@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data) adds w7, w7, #1 bne 9b eor v0.16b, v0.16b, v1.16b - st1 {v0.2d}, [x0] + st1 {v0.16b}, [x0] 10: str w8, [x3] ret ENDPROC(ce_aes_ccm_auth_data) @@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data) * u32 rounds); */ ENTRY(ce_aes_ccm_final) - ld1 {v3.2d}, [x2], #16 /* load first round key */ - ld1 {v0.2d}, [x0] /* load mac */ + ld1 {v3.16b}, [x2], #16 /* load first round key */ + ld1 {v0.16b}, [x0] /* load mac */ cmp w3, #12 /* which key size? */ sub w3, w3, #2 /* modified # of rounds */ - ld1 {v1.2d}, [x1] /* load 1st ctriv */ + ld1 {v1.16b}, [x1] /* load 1st ctriv */ bmi 0f bne 3f mov v5.16b, v3.16b b 2f 0: mov v4.16b, v3.16b -1: ld1 {v5.2d}, [x2], #16 /* load next round key */ +1: ld1 {v5.16b}, [x2], #16 /* load next round key */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -2: ld1 {v3.2d}, [x2], #16 /* load next round key */ +2: ld1 {v3.16b}, [x2], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -3: ld1 {v4.2d}, [x2], #16 /* load next round key */ +3: ld1 {v4.16b}, [x2], #16 /* load next round key */ subs w3, w3, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b @@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final) aese v1.16b, v4.16b /* final round key cancels out */ eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ - st1 {v0.2d}, [x0] /* store result */ + st1 {v0.16b}, [x0] /* store result */ ret ENDPROC(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc ldr x8, [x6, #8] /* load lower ctr */ - ld1 {v0.2d}, [x5] /* load mac */ - rev x8, x8 /* keep swabbed ctr in reg */ + ld1 {v0.16b}, [x5] /* load mac */ +CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 0: /* outer loop */ - ld1 {v1.1d}, [x6] /* load upper ctr */ + ld1 {v1.8b}, [x6] /* load upper ctr */ prfm pldl1strm, [x1] add x8, x8, #1 rev x9, x8 cmp w4, #12 /* which key size? */ sub w7, w4, #2 /* get modified # of rounds */ ins v1.d[1], x9 /* no carry in lower ctr */ - ld1 {v3.2d}, [x3] /* load first round key */ + ld1 {v3.16b}, [x3] /* load first round key */ add x10, x3, #16 bmi 1f bne 4f mov v5.16b, v3.16b b 3f 1: mov v4.16b, v3.16b - ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ + ld1 {v5.16b}, [x10], #16 /* load 2nd round key */ 2: /* inner loop: 3 rounds, 2x interleaved */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -3: ld1 {v3.2d}, [x10], #16 /* load next round key */ +3: ld1 {v3.16b}, [x10], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -4: ld1 {v4.2d}, [x10], #16 /* load next round key */ +4: ld1 {v4.16b}, [x10], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b aese v1.16b, v3.16b aesmc v1.16b, v1.16b - ld1 {v5.2d}, [x10], #16 /* load next round key */ + ld1 {v5.16b}, [x10], #16 /* load next round key */ bpl 2b aese v0.16b, v4.16b aese v1.16b, v4.16b @@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final) eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ st1 {v1.16b}, [x0], #16 /* write output block */ bne 0b - rev x8, x8 - st1 {v0.2d}, [x5] /* store mac */ +CPU_LE( rev x8, x8 ) + st1 {v0.16b}, [x5] /* store mac */ str x8, [x6, #8] /* store lsb end of ctr (BE) */ 5: ret 6: eor v0.16b, v0.16b, v5.16b /* final round mac */ eor v1.16b, v1.16b, v5.16b /* final round enc */ - st1 {v0.2d}, [x5] /* store mac */ + st1 {v0.16b}, [x5] /* store mac */ add w2, w2, #16 /* process partial tail block */ 7: ldrb w9, [x1], #1 /* get 1 byte of input */ umov w6, v1.b[0] /* get top crypted ctr byte */ -- GitLab From 39b7e1c2fdda93d632c38b457f865dcacc8fa01a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Oct 2016 19:15:18 +0100 Subject: [PATCH 0353/1442] crypto: arm64/aes-neon - fix for big endian commit a2c435cc99862fd3d165e1b66bf48ac72c839c62 upstream. The AES implementation using pure NEON instructions relies on the generic AES key schedule generation routines, which store the round keys as arrays of 32-bit quantities stored in memory using native endianness. This means we should refer to these round keys using 4x4 loads rather than 16x1 loads. In addition, the ShiftRows tables are loading using a single scalar load, which is also affected by endianness, so emit these tables in the correct order depending on whether we are building for big endian or not. Fixes: 49788fe2a128 ("arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-neon.S | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index b93170e1cc93..85f07ead7c5c 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -9,6 +9,7 @@ */ #include +#include #define AES_ENTRY(func) ENTRY(neon_ ## func) #define AES_ENDPROC(func) ENDPROC(neon_ ## func) @@ -83,13 +84,13 @@ .endm .macro do_block, enc, in, rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ sub_bytes \in - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -229,7 +230,7 @@ .endm .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ @@ -237,7 +238,7 @@ sub_bytes_2x \in0, \in1 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -254,7 +255,7 @@ .endm .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ @@ -266,7 +267,7 @@ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -306,12 +307,16 @@ .text .align 4 .LForward_ShiftRows: - .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 - .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb +CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 ) +CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb ) +CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 ) +CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 ) .LReverse_ShiftRows: - .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb - .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 +CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb ) +CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 ) +CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 ) +CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 ) .LForward_Sbox: .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -- GitLab From c3edfe038a75ff264fd97cf29f7773e3143df14f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Oct 2016 19:15:15 +0100 Subject: [PATCH 0354/1442] crypto: arm64/sha1-ce - fix for big endian commit ee71e5f1e7d25543ee63a80451871f8985b8d431 upstream. The SHA1 digest is an array of 5 32-bit quantities, so we should refer to them as such in order for this code to work correctly when built for big endian. So replace 16 byte scalar loads and stores with 4x4 vector ones where appropriate. Fixes: 2c98833a42cd ("arm64/crypto: SHA-1 using ARMv8 Crypto Extensions") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/sha1-ce-core.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 033aae6d732a..c98e7e849f06 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -78,7 +78,7 @@ ENTRY(sha1_ce_transform) ld1r {k3.4s}, [x6] /* load state */ - ldr dga, [x0] + ld1 {dgav.4s}, [x0] ldr dgb, [x0, #16] /* load sha1_ce_state::finalize */ @@ -144,7 +144,7 @@ CPU_LE( rev32 v11.16b, v11.16b ) b 1b /* store new state */ -3: str dga, [x0] +3: st1 {dgav.4s}, [x0] str dgb, [x0, #16] ret ENDPROC(sha1_ce_transform) -- GitLab From eb64cbc5665d6756cebe85977e493fe0507f0bdb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Oct 2016 19:15:19 +0100 Subject: [PATCH 0355/1442] crypto: arm64/aes-xts-ce: fix for big endian commit caf4b9e2b326cc2a5005a5c557274306536ace61 upstream. Emit the XTS tweak literal constants in the appropriate order for a single 128-bit scalar literal load. Fixes: 49788fe2a128 ("arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-ce.S | 1 + arch/arm64/crypto/aes-modes.S | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 78f3cfe92c08..b46093d567e5 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -10,6 +10,7 @@ */ #include +#include #define AES_ENTRY(func) ENTRY(ce_ ## func) #define AES_ENDPROC(func) ENDPROC(ce_ ## func) diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index f6e372c528eb..c53dbeae79f2 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -386,7 +386,8 @@ AES_ENDPROC(aes_ctr_encrypt) .endm .Lxts_mul_x: - .word 1, 0, 0x87, 0 +CPU_LE( .quad 1, 0x87 ) +CPU_BE( .quad 0x87, 1 ) AES_ENTRY(aes_xts_encrypt) FRAME_PUSH -- GitLab From d33a490770e5b5bde66b2773b6e0162039dae11a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 11 Oct 2016 19:15:13 +0100 Subject: [PATCH 0356/1442] crypto: arm64/aes-ce - fix for big endian commit 1803b9a52c4e5a5dbb8a27126f6bc06939359753 upstream. The core AES cipher implementation that uses ARMv8 Crypto Extensions instructions erroneously loads the round keys as 64-bit quantities, which causes the algorithm to fail when built for big endian. In addition, the key schedule generation routine fails to take endianness into account as well, when loading the combining the input key with the round constants. So fix both issues. Fixes: 12ac3efe74f8 ("arm64/crypto: use crypto instructions to generate AES key schedule") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-ce-cipher.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c index f7bd9bf0bbb3..50d9fe11d0c8 100644 --- a/arch/arm64/crypto/aes-ce-cipher.c +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -47,24 +47,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_begin_partial(4); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.2d}, [%[key]], #16 ;" + " ld1 {v1.16b}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" "1: aese v0.16b, v2.16b ;" " aesmc v0.16b, v0.16b ;" - "2: ld1 {v1.2d}, [%[key]], #16 ;" + "2: ld1 {v1.16b}, [%[key]], #16 ;" " aese v0.16b, v3.16b ;" " aesmc v0.16b, v0.16b ;" - "3: ld1 {v2.2d}, [%[key]], #16 ;" + "3: ld1 {v2.16b}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aese v0.16b, v1.16b ;" " aesmc v0.16b, v0.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" " bpl 1b ;" " aese v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -92,24 +92,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_begin_partial(4); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.2d}, [%[key]], #16 ;" + " ld1 {v1.16b}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" "1: aesd v0.16b, v2.16b ;" " aesimc v0.16b, v0.16b ;" - "2: ld1 {v1.2d}, [%[key]], #16 ;" + "2: ld1 {v1.16b}, [%[key]], #16 ;" " aesd v0.16b, v3.16b ;" " aesimc v0.16b, v0.16b ;" - "3: ld1 {v2.2d}, [%[key]], #16 ;" + "3: ld1 {v2.16b}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aesd v0.16b, v1.16b ;" " aesimc v0.16b, v0.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" " bpl 1b ;" " aesd v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -173,7 +173,12 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, u32 *rki = ctx->key_enc + (i * kwords); u32 *rko = rki + kwords; +#ifndef CONFIG_CPU_BIG_ENDIAN rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; +#else + rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^ + rki[0]; +#endif rko[1] = rko[0] ^ rki[1]; rko[2] = rko[1] ^ rki[2]; rko[3] = rko[2] ^ rki[3]; -- GitLab From 60a931c20d1a401b79ff742142225c300d545af0 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 8 Dec 2016 15:48:18 -0800 Subject: [PATCH 0357/1442] md: MD_RECOVERY_NEEDED is set for mddev->recovery commit 82a301cb0ea2df8a5c88213094a01660067c7fb4 upstream. Fixes: 90f5f7ad4f38("md: Wait for md_check_recovery before attempting device removal.") Reviewed-by: NeilBrown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 2089d46b0eb8..1a9131b6594c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6829,7 +6829,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, /* need to ensure recovery thread has run */ wait_event_interruptible_timeout(mddev->sb_wait, !test_bit(MD_RECOVERY_NEEDED, - &mddev->flags), + &mddev->recovery), msecs_to_jiffies(5000)); if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) { /* Need to flush page cache, and ensure no-one else opens -- GitLab From 5cc85ef4ffe6c3f1d35b75975c5731248c5af4a7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 5 Dec 2016 16:40:50 +1100 Subject: [PATCH 0358/1442] md: fix refcount problem on mddev when stopping array. commit e2342ca832726a840ca6bd196dd2cc073815b08a upstream. md_open() gets a counted reference on an mddev using mddev_find(). If it ends up returning an error, it must drop this reference. There are two error paths where the reference is not dropped. One only happens if the process is signalled and an awkward time, which is quite unlikely. The other was introduced recently in commit af8d8e6f0. Change the code to ensure the drop the reference when returning an error, and make it harded to re-introduce this sort of bug in the future. Reported-by: Marc Smith Fixes: af8d8e6f0315 ("md: changes for MD_STILL_CLOSED flag") Signed-off-by: NeilBrown Acked-by: Guoqing Jiang Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1a9131b6594c..24925f2aa235 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7092,7 +7092,8 @@ static int md_open(struct block_device *bdev, fmode_t mode) if (test_bit(MD_CLOSING, &mddev->flags)) { mutex_unlock(&mddev->open_mutex); - return -ENODEV; + err = -ENODEV; + goto out; } err = 0; @@ -7101,6 +7102,8 @@ static int md_open(struct block_device *bdev, fmode_t mode) check_disk_change(bdev); out: + if (err) + mddev_put(mddev); return err; } -- GitLab From 725ba1a3ebc49478f76dea369563a79c86546fbd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 20 Oct 2016 19:09:57 -0700 Subject: [PATCH 0359/1442] f2fs: remove percpu_count due to performance regression commit 35782b233f37e48ecc469d9c7232f3f6a7fad41a upstream. This patch removes percpu_count usage due to performance regression in iozone. Fixes: 523be8a6b3 ("f2fs: use percpu_counter for page counters") Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/debug.c | 12 ++++++------ fs/f2fs/f2fs.h | 12 ++++++------ fs/f2fs/super.c | 16 +++++----------- 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 1c35e80732e0..687998e9557c 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -310,17 +310,17 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - inmem: %4lld, wb_bios: %4d\n", + seq_printf(s, " - inmem: %4d, wb_bios: %4d\n", si->inmem_pages, si->wb_bios); - seq_printf(s, " - nodes: %4lld in %4d\n", + seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); - seq_printf(s, " - dents: %4lld in dirs:%4d (%4d)\n", + seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); - seq_printf(s, " - datas: %4lld in files:%4d\n", + seq_printf(s, " - datas: %4d in files:%4d\n", si->ndirty_data, si->ndirty_files); - seq_printf(s, " - meta: %4lld in %4d\n", + seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); - seq_printf(s, " - imeta: %4lld\n", + seq_printf(s, " - imeta: %4d\n", si->ndirty_imeta); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6dd03115789b..506af456412f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -819,7 +819,7 @@ struct f2fs_sb_info { atomic_t nr_wb_bios; /* # of writeback bios */ /* # of pages, see count_type */ - struct percpu_counter nr_pages[NR_COUNT_TYPE]; + atomic_t nr_pages[NR_COUNT_TYPE]; /* # of allocated blocks */ struct percpu_counter alloc_valid_block_count; @@ -1233,7 +1233,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { - percpu_counter_inc(&sbi->nr_pages[count_type]); + atomic_inc(&sbi->nr_pages[count_type]); if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES) return; @@ -1250,7 +1250,7 @@ static inline void inode_inc_dirty_pages(struct inode *inode) static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) { - percpu_counter_dec(&sbi->nr_pages[count_type]); + atomic_dec(&sbi->nr_pages[count_type]); } static inline void inode_dec_dirty_pages(struct inode *inode) @@ -1266,7 +1266,7 @@ static inline void inode_dec_dirty_pages(struct inode *inode) static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) { - return percpu_counter_sum_positive(&sbi->nr_pages[count_type]); + return atomic_read(&sbi->nr_pages[count_type]); } static inline int get_dirty_pages(struct inode *inode) @@ -2187,8 +2187,8 @@ struct f2fs_stat_info { unsigned long long hit_largest, hit_cached, hit_rbtree; unsigned long long hit_total, total_ext; int ext_tree, zombie_tree, ext_node; - s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; - s64 inmem_pages; + int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; + int inmem_pages; unsigned int ndirty_dirs, ndirty_files, ndirty_all; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8021d35df7b0..013c6a541d6b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -688,10 +688,6 @@ static void f2fs_destroy_inode(struct inode *inode) static void destroy_percpu_info(struct f2fs_sb_info *sbi) { - int i; - - for (i = 0; i < NR_COUNT_TYPE; i++) - percpu_counter_destroy(&sbi->nr_pages[i]); percpu_counter_destroy(&sbi->alloc_valid_block_count); percpu_counter_destroy(&sbi->total_valid_inode_count); } @@ -1442,6 +1438,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) static void init_sb_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = sbi->raw_super; + int i; sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); @@ -1466,6 +1463,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); + for (i = 0; i < NR_COUNT_TYPE; i++) + atomic_set(&sbi->nr_pages[i], 0); + INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); mutex_init(&sbi->wio_mutex[NODE]); @@ -1481,13 +1481,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) static int init_percpu_info(struct f2fs_sb_info *sbi) { - int i, err; - - for (i = 0; i < NR_COUNT_TYPE; i++) { - err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); - if (err) - return err; - } + int err; err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL); if (err) -- GitLab From a299abd230818ceb4e68520ba2f442194267cc50 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 2 Nov 2016 14:52:15 +0100 Subject: [PATCH 0360/1442] f2fs: hide a maybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 230436b3ef3fd7d4a1da19edf5e87bb2d74e0fc2 upstream. gcc is unsure about the use of last_ofs_in_node, which might happen without a prior initialization: fs/f2fs//git/arm-soc/fs/f2fs/data.c: In function ‘f2fs_map_blocks’: fs/f2fs/data.c:799:54: warning: ‘last_ofs_in_node’ may be used uninitialized in this function [-Wmaybe-uninitialized] if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) { As pointed out by Chao Yu, the code is actually correct as 'prealloc' is only set if the last_ofs_in_node has been set, the two always get updated together. This initializes last_ofs_in_node to dn.ofs_in_node for each new dnode at the start of the 'next_block' loop, which at that point is a correct initialization as well. I assume that compilers that correctly track the contents of the variables and do not warn about the condition also figure out that they can eliminate the extra assignment here. Fixes: 46008c6d4232 ("f2fs: support in batch multi blocks preallocation") Signed-off-by: Arnd Bergmann Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9ae194fd2fdb..14db4b712021 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -716,7 +716,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, } prealloc = 0; - ofs_in_node = dn.ofs_in_node; + last_ofs_in_node = ofs_in_node = dn.ofs_in_node; end_offset = ADDRS_PER_PAGE(dn.node_page, inode); next_block: -- GitLab From 257349bedca317b10b25d489082bd32a38480aae Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 18 Nov 2016 09:30:24 -0200 Subject: [PATCH 0361/1442] staging: media: davinci_vpfe: unlock on error in vpfe_reqbufs() commit c4a407b91f4b644145492e28723f9f880efb1da0 upstream. We should unlock before returning this error code in vpfe_reqbufs(). Fixes: 622897da67b3 ("[media] davinci: vpfe: add v4l2 video driver support") Signed-off-by: Dan Carpenter Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/davinci_vpfe/vpfe_video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/davinci_vpfe/vpfe_video.c b/drivers/staging/media/davinci_vpfe/vpfe_video.c index 8be9f854510f..89dd6b989254 100644 --- a/drivers/staging/media/davinci_vpfe/vpfe_video.c +++ b/drivers/staging/media/davinci_vpfe/vpfe_video.c @@ -1362,7 +1362,7 @@ static int vpfe_reqbufs(struct file *file, void *priv, ret = vb2_queue_init(q); if (ret) { v4l2_err(&vpfe_dev->v4l2_dev, "vb2_queue_init() failed\n"); - return ret; + goto unlock_out; } fh->io_allowed = 1; -- GitLab From 6e4bcf8539afce37430c4e56b1520535f0f5ea7a Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 7 Dec 2016 15:05:59 -0600 Subject: [PATCH 0362/1442] PCI: rockchip: Fix negotiated lanes calculation commit 45e9320f3a4ef9588ee50a2eb1891c4bfdbb07df upstream. The calculation of negotiated lanes is wrong: it should be shifted by PCIE_CORE_PL_CONF_LANE_SHIFT, but it is shifted by PCIE_CORE_PL_CONF_LANE_MASK instead. Let's fix it. Fixes: e77f847df54c ("PCI: rockchip: Add Rockchip PCIe controller support") Signed-off-by: Shawn Lin Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pcie-rockchip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c index e04f69beb42d..8e390ec498a8 100644 --- a/drivers/pci/host/pcie-rockchip.c +++ b/drivers/pci/host/pcie-rockchip.c @@ -590,8 +590,8 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) /* Check the final link width from negotiated lane counter from MGMT */ status = rockchip_pcie_read(rockchip, PCIE_CORE_CTRL); - status = 0x1 << ((status & PCIE_CORE_PL_CONF_LANE_MASK) >> - PCIE_CORE_PL_CONF_LANE_MASK); + status = 0x1 << ((status & PCIE_CORE_PL_CONF_LANE_MASK) >> + PCIE_CORE_PL_CONF_LANE_SHIFT); dev_dbg(dev, "current link width is x%d\n", status); rockchip_pcie_write(rockchip, ROCKCHIP_VENDOR_ID, -- GitLab From 090cce6f6f88ecf34cee068080800a27b70e993c Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 7 Dec 2016 15:06:00 -0600 Subject: [PATCH 0363/1442] PCI: rockchip: Correct the use of FTS mask commit a45e2611b9bbd81288d97d02ce7e74a60a698d43 upstream. We're trying to mask out bits[23:8] while retaining [32:24, 7:0], but we're doing the inverse. That doesn't have too much effect, since we're setting all the [23:8] bits to 1, and the other bits are only relevant for modes we're currently not using. But we should get this right. Fixes: ca1989084054 ("PCI: rockchip: Fix wrong transmitted FTS count") Signed-off-by: Brian Norris Signed-off-by: Bjorn Helgaas Acked-by: Shawn Lin Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pcie-rockchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c index 8e390ec498a8..3452983d3569 100644 --- a/drivers/pci/host/pcie-rockchip.c +++ b/drivers/pci/host/pcie-rockchip.c @@ -533,7 +533,7 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) /* Fix the transmitted FTS count desired to exit from L0s. */ status = rockchip_pcie_read(rockchip, PCIE_CORE_CTRL_PLC1); - status = (status & PCIE_CORE_CTRL_PLC1_FTS_MASK) | + status = (status & ~PCIE_CORE_CTRL_PLC1_FTS_MASK) | (PCIE_CORE_CTRL_PLC1_FTS_CNT << PCIE_CORE_CTRL_PLC1_FTS_SHIFT); rockchip_pcie_write(rockchip, status, PCIE_CORE_CTRL_PLC1); -- GitLab From 04b97a6be2eddd984ea176bc470d87248d9213d9 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 17 Nov 2016 16:06:56 -0600 Subject: [PATCH 0364/1442] PCI: Add Mellanox device IDs commit 7254383341bc6e1a61996accd836009f0c922b21 upstream. Add Mellanox device IDs for use by the mlx4 driver and INTx quirks. [bhelgaas: sorted and adapted from http://lkml.kernel.org/r/1478011644-12080-1-git-send-email-noaos@mellanox.com] Signed-off-by: Noa Osherovich Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c58752fe16c4..f020ab4079d3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2256,12 +2256,29 @@ #define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0 #define PCI_VENDOR_ID_MELLANOX 0x15b3 -#define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007 +#define PCI_DEVICE_ID_MELLANOX_CONNECTIB 0x1011 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4 0x1013 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX 0x1015 +#define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 #define PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE 0x5a46 -#define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278 -#define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282 -#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c -#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 +#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c +#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 +#define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278 +#define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282 +#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340 +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354 +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372 +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732 +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746 +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764 +#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e #define PCI_VENDOR_ID_DFI 0x15bd -- GitLab From 890661544739d929cf4c5d9bce460f66ff18cb6c Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 15 Nov 2016 09:59:58 +0200 Subject: [PATCH 0365/1442] PCI: Convert broken INTx masking quirks from HEADER to FINAL commit b88214ce4d7064992452765028bd50702414f15f upstream. Convert all quirk_broken_intx_masking() quirks from HEADER to FINAL. The quirk sets dev->broken_intx_masking, which is only used by pci_intx_mask_supported(), which is not needed until after FINAL quirks have been run. [bhelgaas: changelog] Signed-off-by: Noa Osherovich Signed-off-by: Bjorn Helgaas Reviewed-by: Gavin Shan Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 72 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c232729f5b1b..85048fdf2474 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3146,53 +3146,53 @@ static void quirk_broken_intx_masking(struct pci_dev *dev) { dev->broken_intx_masking = 1; } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CHELSIO, 0x0030, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */ - quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x0030, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */ + quirk_broken_intx_masking); /* * Realtek RTL8169 PCI Gigabit Ethernet Controller (rev 10) * Subsystem: Realtek RTL8169/8110 Family PCI Gigabit Ethernet NIC * * RTL8110SC - Fails under PCI device assignment using DisINTx masking. */ -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_REALTEK, 0x8169, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID, - quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REALTEK, 0x8169, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID, + quirk_broken_intx_masking); /* * Intel i40e (XL710/X710) 10/20/40GbE NICs all have broken INTx masking, * DisINTx can be set but the interrupt status bit is non-functional. */ -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1572, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1574, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1580, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1581, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1583, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1584, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1585, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1586, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1587, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1588, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1589, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x37d0, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x37d1, - quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x37d2, - quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1572, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1574, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1580, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1581, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1583, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1584, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1585, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1586, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1587, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1588, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1589, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x37d0, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x37d1, + quirk_broken_intx_masking); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x37d2, + quirk_broken_intx_masking); static void quirk_no_bus_reset(struct pci_dev *dev) { -- GitLab From e2822904ace59b89c054123534b6da6bb82c72c6 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 15 Nov 2016 09:59:59 +0200 Subject: [PATCH 0366/1442] PCI: Convert Mellanox broken INTx quirks to be for listed devices only commit d76d2fe05fd93673d184af77255bbbc63780f4ea upstream. Change Mellanox's broken_intx_masking() quirk from an "all Mellanox devices" to a quirk for listed devices only. [bhelgaas: remove #defines, reorder to keep other quirks together] Signed-off-by: Noa Osherovich Signed-off-by: Bjorn Helgaas Reviewed-by: Or Gerlitz Reviewed-by: Gavin Shan Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 85048fdf2474..f0515b78a4a6 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3137,8 +3137,9 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b5, quirk_remove_d3_delay); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x22b7, quirk_remove_d3_delay); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2298, quirk_remove_d3_delay); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x229c, quirk_remove_d3_delay); + /* - * Some devices may pass our check in pci_intx_mask_supported if + * Some devices may pass our check in pci_intx_mask_supported() if * PCI_COMMAND_INTX_DISABLE works though they actually do not properly * support this feature. */ @@ -3150,6 +3151,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x0030, quirk_broken_intx_masking); DECLARE_PCI_FIXUP_FINAL(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */ quirk_broken_intx_masking); + /* * Realtek RTL8169 PCI Gigabit Ethernet Controller (rev 10) * Subsystem: Realtek RTL8169/8110 Family PCI Gigabit Ethernet NIC @@ -3158,8 +3160,6 @@ DECLARE_PCI_FIXUP_FINAL(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */ */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REALTEK, 0x8169, quirk_broken_intx_masking); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID, - quirk_broken_intx_masking); /* * Intel i40e (XL710/X710) 10/20/40GbE NICs all have broken INTx masking, @@ -3194,6 +3194,40 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x37d1, DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x37d2, quirk_broken_intx_masking); +static u16 mellanox_broken_intx_devs[] = { + PCI_DEVICE_ID_MELLANOX_HERMON_SDR, + PCI_DEVICE_ID_MELLANOX_HERMON_DDR, + PCI_DEVICE_ID_MELLANOX_HERMON_QDR, + PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2, + PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2, + PCI_DEVICE_ID_MELLANOX_HERMON_EN, + PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2, + PCI_DEVICE_ID_MELLANOX_CONNECTX_EN, + PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2, + PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2, + PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2, + PCI_DEVICE_ID_MELLANOX_CONNECTX2, + PCI_DEVICE_ID_MELLANOX_CONNECTX3, + PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO, + PCI_DEVICE_ID_MELLANOX_CONNECTIB, + PCI_DEVICE_ID_MELLANOX_CONNECTX4, + PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX, +}; + +static void mellanox_check_broken_intx_masking(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mellanox_broken_intx_devs); i++) { + if (pdev->device == mellanox_broken_intx_devs[i]) { + pdev->broken_intx_masking = 1; + return; + } + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID, + mellanox_check_broken_intx_masking); + static void quirk_no_bus_reset(struct pci_dev *dev) { dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET; -- GitLab From c444cc34534b779d2f2b0edda17444c7a6aab9b5 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 15 Nov 2016 10:00:00 +0200 Subject: [PATCH 0367/1442] PCI: Support INTx masking on ConnectX-4 with firmware x.14.1100+ commit 1600f62534b7b3da7978b43b52231a54c24df287 upstream. Mellanox devices were marked as having INTx masking ability broken. As a result, the VFIO driver fails to start when more than one device function is passed-through to a VM if both have the same INTx pin. Prior to Connect-IB, Mellanox devices exposed to the operating system one PCI function per all ports. Starting from Connect-IB, the devices are function-per-port. When passing the second function to a VM, VFIO will fail to start. Exclude ConnectX-4, ConnectX4-Lx and Connect-IB from the list of Mellanox devices marked as having broken INTx masking: - ConnectX-4 and ConnectX4-LX firmware version is checked. If INTx masking is supported, we unmark the broken INTx masking. - Connect-IB does not support INTx currently so will not cause any problem. [bhelgaas: call pci_disable_device() always, after iounmap()] Fixes: 11e42532ada3 ("PCI: Assume all Mellanox devices have broken INTx masking") Signed-off-by: Noa Osherovich Signed-off-by: Bjorn Helgaas Reviewed-by: Or Gerlitz Reviewed-by: Gavin Shan Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 59 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index f0515b78a4a6..93b03128bec0 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3209,13 +3209,25 @@ static u16 mellanox_broken_intx_devs[] = { PCI_DEVICE_ID_MELLANOX_CONNECTX2, PCI_DEVICE_ID_MELLANOX_CONNECTX3, PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO, - PCI_DEVICE_ID_MELLANOX_CONNECTIB, - PCI_DEVICE_ID_MELLANOX_CONNECTX4, - PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX, }; +#define CONNECTX_4_CURR_MAX_MINOR 99 +#define CONNECTX_4_INTX_SUPPORT_MINOR 14 + +/* + * Check ConnectX-4/LX FW version to see if it supports legacy interrupts. + * If so, don't mark it as broken. + * FW minor > 99 means older FW version format and no INTx masking support. + * FW minor < 14 means new FW version format and no INTx masking support. + */ static void mellanox_check_broken_intx_masking(struct pci_dev *pdev) { + __be32 __iomem *fw_ver; + u16 fw_major; + u16 fw_minor; + u16 fw_subminor; + u32 fw_maj_min; + u32 fw_sub_min; int i; for (i = 0; i < ARRAY_SIZE(mellanox_broken_intx_devs); i++) { @@ -3224,6 +3236,47 @@ static void mellanox_check_broken_intx_masking(struct pci_dev *pdev) return; } } + + /* Getting here means Connect-IB cards and up. Connect-IB has no INTx + * support so shouldn't be checked further + */ + if (pdev->device == PCI_DEVICE_ID_MELLANOX_CONNECTIB) + return; + + if (pdev->device != PCI_DEVICE_ID_MELLANOX_CONNECTX4 && + pdev->device != PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) + return; + + /* For ConnectX-4 and ConnectX-4LX, need to check FW support */ + if (pci_enable_device_mem(pdev)) { + dev_warn(&pdev->dev, "Can't enable device memory\n"); + return; + } + + fw_ver = ioremap(pci_resource_start(pdev, 0), 4); + if (!fw_ver) { + dev_warn(&pdev->dev, "Can't map ConnectX-4 initialization segment\n"); + goto out; + } + + /* Reading from resource space should be 32b aligned */ + fw_maj_min = ioread32be(fw_ver); + fw_sub_min = ioread32be(fw_ver + 1); + fw_major = fw_maj_min & 0xffff; + fw_minor = fw_maj_min >> 16; + fw_subminor = fw_sub_min & 0xffff; + if (fw_minor > CONNECTX_4_CURR_MAX_MINOR || + fw_minor < CONNECTX_4_INTX_SUPPORT_MINOR) { + dev_warn(&pdev->dev, "ConnectX-4: FW %u.%u.%u doesn't support INTx masking, disabling. Please upgrade FW to %d.14.1100 and up for INTx support\n", + fw_major, fw_minor, fw_subminor, pdev->device == + PCI_DEVICE_ID_MELLANOX_CONNECTX4 ? 12 : 14); + pdev->broken_intx_masking = 1; + } + + iounmap(fw_ver); + +out: + pci_disable_device(pdev); } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID, mellanox_check_broken_intx_masking); -- GitLab From 1aaa777ec0095f66d9717d07c224b374fbb5f08a Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 24 Oct 2016 18:04:17 +1100 Subject: [PATCH 0368/1442] PCI: Enable access to non-standard VPD for Chelsio devices (cxgb3) commit 1c7de2b4ff886a45fbd2f4c3d4627e0f37a9dd77 upstream. There is at least one Chelsio 10Gb card which uses VPD area to store some non-standard blocks (example below). However pci_vpd_size() returns the length of the first block only assuming that there can be only one VPD "End Tag". Since 4e1a635552d3 ("vfio/pci: Use kernel VPD access functions"), VFIO blocks access beyond that offset, which prevents the guest "cxgb3" driver from probing the device. The host system does not have this problem as its driver accesses the config space directly without pci_read_vpd(). Add a quirk to override the VPD size to a bigger value. The maximum size is taken from EEPROMSIZE in drivers/net/ethernet/chelsio/cxgb3/common.h. We do not read the tag as the cxgb3 driver does as the driver supports writing to EEPROM/VPD and when it writes, it only checks for 8192 bytes boundary. The quirk is registered for all devices supported by the cxgb3 driver. This adds a quirk to the PCI layer (not to the cxgb3 driver) as the cxgb3 driver itself accesses VPD directly and the problem only exists with the vfio-pci driver (when cxgb3 is not running on the host and may not be even loaded) which blocks accesses beyond the first block of VPD data. However vfio-pci itself does not have quirks mechanism so we add it to PCI. This is the controller: Ethernet controller [0200]: Chelsio Communications Inc T310 10GbE Single Port Adapter [1425:0030] This is what I parsed from its VPD: === b'\x82*\x0010 Gigabit Ethernet-SR PCI Express Adapter\x90J\x00EC\x07D76809 FN\x0746K' 0000 Large item 42 bytes; name 0x2 Identifier String b'10 Gigabit Ethernet-SR PCI Express Adapter' 002d Large item 74 bytes; name 0x10 #00 [EC] len=7: b'D76809 ' #0a [FN] len=7: b'46K7897' #14 [PN] len=7: b'46K7897' #1e [MN] len=4: b'1037' #25 [FC] len=4: b'5769' #2c [SN] len=12: b'YL102035603V' #3b [NA] len=12: b'00145E992ED1' 007a Small item 1 bytes; name 0xf End Tag 0c00 Large item 16 bytes; name 0x2 Identifier String b'S310E-SR-X ' 0c13 Large item 234 bytes; name 0x10 #00 [PN] len=16: b'TBD ' #13 [EC] len=16: b'110107730D2 ' #26 [SN] len=16: b'97YL102035603V ' #39 [NA] len=12: b'00145E992ED1' #48 [V0] len=6: b'175000' #51 [V1] len=6: b'266666' #5a [V2] len=6: b'266666' #63 [V3] len=6: b'2000 ' #6c [V4] len=2: b'1 ' #71 [V5] len=6: b'c2 ' #7a [V6] len=6: b'0 ' #83 [V7] len=2: b'1 ' #88 [V8] len=2: b'0 ' #8d [V9] len=2: b'0 ' #92 [VA] len=2: b'0 ' #97 [RV] len=80: b's\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'... 0d00 Large item 252 bytes; name 0x11 #00 [VC] len=16: b'122310_1222 dp ' #13 [VD] len=16: b'610-0001-00 H1\x00\x00' #26 [VE] len=16: b'122310_1353 fp ' #39 [VF] len=16: b'610-0001-00 H1\x00\x00' #4c [RW] len=173: b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'... 0dff Small item 0 bytes; name 0xf End Tag 10f3 Large item 13315 bytes; name 0x62 !!! unknown item name 98: b'\xd0\x03\x00@`\x0c\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00' === Signed-off-by: Alexey Kardashevskiy Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 93b03128bec0..3a035e073889 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3342,6 +3342,25 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PORT_RIDGE, quirk_thunderbolt_hotplug_msi); +static void quirk_chelsio_extend_vpd(struct pci_dev *dev) +{ + pci_set_vpd_size(dev, 8192); +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x20, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x21, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x22, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x23, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x24, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x25, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x26, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x30, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x31, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x32, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x35, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x36, quirk_chelsio_extend_vpd); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x37, quirk_chelsio_extend_vpd); + #ifdef CONFIG_ACPI /* * Apple: Shutdown Cactus Ridge Thunderbolt controller. -- GitLab From b3539f813578419c6b386a669405284df5c5950d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 16:26:03 +0100 Subject: [PATCH 0369/1442] powerpc/pci/rpadlpar: Fix device reference leaks commit 99e5cde5eae78bef95bfe7c16ccda87fb070149b upstream. Make sure to drop any device reference taken by vio_find_node() when adding and removing virtual I/O slots. Fixes: 5eeb8c63a38f ("[PATCH] PCI Hotplug: rpaphp: Move VIO registration") Signed-off-by: Johan Hovold Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/rpadlpar_core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index dc67f39779ec..c614ff7c3bc3 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -257,8 +257,13 @@ static int dlpar_add_phb(char *drc_name, struct device_node *dn) static int dlpar_add_vio_slot(char *drc_name, struct device_node *dn) { - if (vio_find_node(dn)) + struct vio_dev *vio_dev; + + vio_dev = vio_find_node(dn); + if (vio_dev) { + put_device(&vio_dev->dev); return -EINVAL; + } if (!vio_register_device_node(dn)) { printk(KERN_ERR @@ -334,6 +339,9 @@ static int dlpar_remove_vio_slot(char *drc_name, struct device_node *dn) return -EINVAL; vio_unregister_device(vio_dev); + + put_device(&vio_dev->dev); + return 0; } -- GitLab From dae9151a88f775f88dd43595b8288f9aac13eedb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Dec 2016 09:50:16 +0100 Subject: [PATCH 0370/1442] s390/topology: always use s390 specific sched_domain_topology_level commit ebb299a51059017ec253bd30781a83d1f6e11b24 upstream. The s390 specific sched_domain_topology_level should always be used, not only if the machine provides topology information. Luckily this odd behaviour, that was by accident introduced with git commit d05d15da18f5 ("s390/topology: delay initialization of topology cpu masks") has currently no side effect. Fixes: d05d15da18f5 ("s390/topology: delay initialization of topology cpumasks") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index e959c02e0cac..8705ee66c087 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -448,6 +448,7 @@ static int __init s390_topology_init(void) struct sysinfo_15_1_x *info; int i; + set_sched_topology(s390_topology); if (!MACHINE_HAS_TOPOLOGY) return 0; tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL); @@ -460,7 +461,6 @@ static int __init s390_topology_init(void) alloc_masks(info, &socket_info, 1); alloc_masks(info, &book_info, 2); alloc_masks(info, &drawer_info, 3); - set_sched_topology(s390_topology); return 0; } early_initcall(s390_topology_init); -- GitLab From 5417f59cb9962a847a3148ff3fa959b7f7502559 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 7 Nov 2016 15:06:03 +0100 Subject: [PATCH 0371/1442] s390/pci: fix dma address calculation in map_sg commit 6b7df3ce92ac82ec3f4a2953b6fed77da7b38aaa upstream. __s390_dma_map_sg maps a dma-contiguous area. Although we only map whole pages we have to take into account that the area doesn't start or stop at a page boundary because we use the dma address to loop over the individual sg entries. Failing to do that might lead to an access of the wrong sg entry. Fixes: ee877b81c6b9 ("s390/pci_dma: improve map_sg") Reported-and-tested-by: Christoph Raisch Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/pci/pci_dma.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 6b2f72f523b9..049e3860ac54 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -419,6 +419,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, size_t size, dma_addr_t *handle, enum dma_data_direction dir) { + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); dma_addr_t dma_addr_base, dma_addr; int flags = ZPCI_PTE_VALID; @@ -426,8 +427,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, unsigned long pa = 0; int ret; - size = PAGE_ALIGN(size); - dma_addr_base = dma_alloc_address(dev, size >> PAGE_SHIFT); + dma_addr_base = dma_alloc_address(dev, nr_pages); if (dma_addr_base == DMA_ERROR_CODE) return -ENOMEM; @@ -436,26 +436,27 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, flags |= ZPCI_TABLE_PROTECTED; for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { - pa = page_to_phys(sg_page(s)) + s->offset; - ret = __dma_update_trans(zdev, pa, dma_addr, s->length, flags); + pa = page_to_phys(sg_page(s)); + ret = __dma_update_trans(zdev, pa, dma_addr, + s->offset + s->length, flags); if (ret) goto unmap; - dma_addr += s->length; + dma_addr += s->offset + s->length; } ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); if (ret) goto unmap; *handle = dma_addr_base; - atomic64_add(size >> PAGE_SHIFT, &zdev->mapped_pages); + atomic64_add(nr_pages, &zdev->mapped_pages); return ret; unmap: dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, ZPCI_PTE_INVALID); - dma_free_address(dev, dma_addr_base, size >> PAGE_SHIFT); + dma_free_address(dev, dma_addr_base, nr_pages); zpci_err("map error:\n"); zpci_err_dma(ret, pa); return ret; -- GitLab From 9ab30a6529b5d853f45b77593caada1a44156ed8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 27 Oct 2016 15:37:44 +0900 Subject: [PATCH 0372/1442] drm/radeon: Always store CRTC relative radeon_crtc->cursor_x/y values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4349bd775cc8fd75cb648e3a2036a690f497de5c upstream. We were storing viewport relative coordinates for AVIVO/DCE display engines. However, radeon_crtc_cursor_set2 and radeon_cursor_reset pass radeon_crtc->cursor_x/y as the x/y parameters of radeon_cursor_move_locked, which would break if the CRTC isn't located at (0, 0). Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_cursor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 87a72476d313..fb16070b266e 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -146,6 +146,9 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) int xorigin = 0, yorigin = 0; int w = radeon_crtc->cursor_width; + radeon_crtc->cursor_x = x; + radeon_crtc->cursor_y = y; + if (ASIC_IS_AVIVO(rdev)) { /* avivo cursor are offset into the total surface */ x += crtc->x; @@ -240,9 +243,6 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) yorigin * 256); } - radeon_crtc->cursor_x = x; - radeon_crtc->cursor_y = y; - if (radeon_crtc->cursor_out_of_bounds) { radeon_crtc->cursor_out_of_bounds = false; if (radeon_crtc->cursor_bo) -- GitLab From 2bdb638de2fc9c2c1aa15ecdf28e295786a02ea1 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Tue, 13 Dec 2016 18:57:44 -0200 Subject: [PATCH 0373/1442] drm/i915: disable PSR by default on HSW/BDW commit 1c4672ce4eeaeaadeea8adabaad21262b7172607 upstream. We've been ignoring the poor bugzilla reporters that say PSR causes system lockups and all other sorts of problems. The earliest bug report is from April, so I think we can use the "revert the offending commit if no fixes are presented within 8 months" rule here. Fixes: 9b58e352b463 ("drm/i915: Enable PSR by default on Haswell and Broadwell.") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97602 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97515 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96736 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96704 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96569 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95176 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94985 Cc: Rodrigo Vivi Cc: Jim Bride Signed-off-by: Paulo Zanoni Acked-by: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1481662664-18986-1-git-send-email-paulo.r.zanoni@intel.com (cherry picked from commit 2ee7dc497e348eecbb82adbb1ea9e9a7e29fe921) Signed-off-by: Jani Nikula Signed-off-by: Paulo Zanoni Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_psr.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 108ba1e5d658..9b307cee3008 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -825,13 +825,9 @@ void intel_psr_init(struct drm_device *dev) dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ? HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE; - /* Per platform default */ - if (i915.enable_psr == -1) { - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - i915.enable_psr = 1; - else - i915.enable_psr = 0; - } + /* Per platform default: all disabled. */ + if (i915.enable_psr == -1) + i915.enable_psr = 0; /* Set link_standby x link_off defaults */ if (IS_HASWELL(dev) || IS_BROADWELL(dev)) -- GitLab From f482823e99f045bc5f1bd54626188219dfd1ad46 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Tue, 11 Oct 2016 15:25:38 -0300 Subject: [PATCH 0374/1442] drm/i915/gen9: unconditionally apply the memory bandwidth WA commit ee3d532fcb64872bc20be0ee58f7afdb9fa82abe upstream. Mahesh Kumar is already working on a proper implementation for the workaround, but while we still don't have it, let's just unconditionally apply the workaround for everybody and we hope we can close all those numerous bugzilla tickets. Also, I'm not sure how easy it will be to backport the final implementation to the stable Kernels, and this patch here is probably easier to backport. At the present moment I still don't have confirmation that this patch fixes any of the bugs listed below, but we should definitely try testing all of them again. v2: s/intel_needs_memory_bw_wa/skl_needs_memory_bw_wa/ (Lyude). v3: Rebase (dev -> dev_priv change on ilk_wm_max_level). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94337 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94605 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94884 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95010 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96226 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96828 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97450 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97830 Cc: Mahesh Kumar Cc: Lyude Cc: Dhinakaran Pandiyan Signed-off-by: Paulo Zanoni Reviewed-by: Lyude Link: http://patchwork.freedesktop.org/patch/msgid/1476210338-9797-1-git-send-email-paulo.r.zanoni@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_pm.c | 49 +++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index db24f898853c..9b8c154d5704 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2879,6 +2879,21 @@ skl_wm_plane_id(const struct intel_plane *plane) } } +/* + * FIXME: We still don't have the proper code detect if we need to apply the WA, + * so assume we'll always need it in order to avoid underruns. + */ +static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || + IS_KABYLAKE(dev_priv)) + return true; + + return false; +} + static bool intel_has_sagv(struct drm_i915_private *dev_priv) { @@ -2999,9 +3014,10 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) struct drm_device *dev = state->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_crtc *crtc; + struct intel_crtc *crtc; + struct intel_plane *plane; enum pipe pipe; - int level, plane; + int level, id, latency; if (!intel_has_sagv(dev_priv)) return false; @@ -3019,27 +3035,36 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) /* Since we're now guaranteed to only have one active CRTC... */ pipe = ffs(intel_state->active_crtcs) - 1; - crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); - if (crtc->state->mode.flags & DRM_MODE_FLAG_INTERLACE) + if (crtc->base.state->mode.flags & DRM_MODE_FLAG_INTERLACE) return false; - for_each_plane(dev_priv, pipe, plane) { + for_each_intel_plane_on_crtc(dev, crtc, plane) { + id = skl_wm_plane_id(plane); + /* Skip this plane if it's not enabled */ - if (intel_state->wm_results.plane[pipe][plane][0] == 0) + if (intel_state->wm_results.plane[pipe][id][0] == 0) continue; /* Find the highest enabled wm level for this plane */ for (level = ilk_wm_max_level(dev); - intel_state->wm_results.plane[pipe][plane][level] == 0; --level) + intel_state->wm_results.plane[pipe][id][level] == 0; --level) { } + latency = dev_priv->wm.skl_latency[level]; + + if (skl_needs_memory_bw_wa(intel_state) && + plane->base.state->fb->modifier[0] == + I915_FORMAT_MOD_X_TILED) + latency += 15; + /* * If any of the planes on this pipe don't enable wm levels * that incur memory latencies higher then 30µs we can't enable * the SAGV */ - if (dev_priv->wm.skl_latency[level] < SKL_SAGV_BLOCK_TIME) + if (latency < SKL_SAGV_BLOCK_TIME) return false; } @@ -3549,12 +3574,18 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint32_t width = 0, height = 0; uint32_t plane_pixel_rate; uint32_t y_tile_minimum, y_min_scanlines; + struct intel_atomic_state *state = + to_intel_atomic_state(cstate->base.state); + bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { *enabled = false; return 0; } + if (apply_memory_bw_wa && fb->modifier[0] == I915_FORMAT_MOD_X_TILED) + latency += 15; + width = drm_rect_width(&intel_pstate->base.src) >> 16; height = drm_rect_height(&intel_pstate->base.src) >> 16; @@ -3606,6 +3637,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, plane_blocks_per_line); y_tile_minimum = plane_blocks_per_line * y_min_scanlines; + if (apply_memory_bw_wa) + y_tile_minimum *= 2; if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { -- GitLab From 4ffac6f06dfa77ead74d448d07079e37339e4da0 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Tue, 8 Nov 2016 18:22:11 -0200 Subject: [PATCH 0375/1442] drm/i915/gen9: fix the WM memory bandwidth WA for Y tiling cases commit 2ef32dee97fcf41987722a37eb6ff1a983915e99 upstream. The previous spec version said "double Ytile planes minimum lines", and I interpreted this as referring to what the spec calls "Y tile minimum", but in fact it was referring to what the spec calls "Minimum Scanlines for Y tile". I noticed that Mahesh Kumar had a different interpretation, so I sent and email to the spec authors and got clarification on the correct meaning. Also, BSpec was updated and should be clear now. Fixes: ee3d532fcb64 ("drm/i915/gen9: unconditionally apply the memory bandwidth WA") Cc: stable@vger.kernel.org Cc: Mahesh Kumar Signed-off-by: Paulo Zanoni Reviewed-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/1478636531-6081-1-git-send-email-paulo.r.zanoni@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_pm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9b8c154d5704..103cefdb9ddd 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3617,6 +3617,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, y_min_scanlines = 4; } + if (apply_memory_bw_wa) + y_min_scanlines *= 2; + plane_bytes_per_line = width * cpp; if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { @@ -3637,8 +3640,6 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, plane_blocks_per_line); y_tile_minimum = plane_blocks_per_line * y_min_scanlines; - if (apply_memory_bw_wa) - y_tile_minimum *= 2; if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { -- GitLab From 56d22b9125421217fdea41250cce9dfa0013b483 Mon Sep 17 00:00:00 2001 From: Josh Zimmerman Date: Thu, 27 Oct 2016 14:50:09 -0700 Subject: [PATCH 0376/1442] tpm_tis: Check return values from get_burstcount. commit 26a137e31ffe6fbfdb008554a8d9b3d55bd5c86e upstream. If the TPM we're connecting to uses a static burst count, it will report a burst count of zero throughout the response read. However, get_burstcount assumes that a response of zero indicates that the TPM is not ready to receive more data. In this case, it returns a negative error code, which is passed on to tpm_tis_{write,read}_bytes as a u16, causing them to read/write far too many bytes. This patch checks for negative return codes and bails out from recv_data and tpm_tis_send_data. Fixes: 1107d065fdf1 (tpm_tis: Introduce intermediate layer for TPM access) Signed-off-by: Josh Zimmerman Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_core.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index e3bf31b37138..a1ce0607bf7b 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -185,7 +185,12 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count) TPM_STS_DATA_AVAIL | TPM_STS_VALID, chip->timeout_c, &priv->read_queue, true) == 0) { - burstcnt = min_t(int, get_burstcount(chip), count - size); + burstcnt = get_burstcount(chip); + if (burstcnt < 0) { + dev_err(&chip->dev, "Unable to read burstcount\n"); + return burstcnt; + } + burstcnt = min_t(int, burstcnt, count - size); rc = tpm_tis_read_bytes(priv, TPM_DATA_FIFO(priv->locality), burstcnt, buf + size); @@ -271,7 +276,13 @@ static int tpm_tis_send_data(struct tpm_chip *chip, u8 *buf, size_t len) } while (count < len - 1) { - burstcnt = min_t(int, get_burstcount(chip), len - count - 1); + burstcnt = get_burstcount(chip); + if (burstcnt < 0) { + dev_err(&chip->dev, "Unable to read burstcount\n"); + rc = burstcnt; + goto out_err; + } + burstcnt = min_t(int, burstcnt, len - count - 1); rc = tpm_tis_write_bytes(priv, TPM_DATA_FIFO(priv->locality), burstcnt, buf + count); if (rc < 0) -- GitLab From ae8b6cb40cb29dfa760641ce55d5b9c0c153a1f2 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 9 Jan 2017 16:38:32 +0100 Subject: [PATCH 0377/1442] xfs: don't call xfs_sb_quota_from_disk twice commit e6fc6fcf4447c9266038c55c25e4c7c14bee110c upstream. Source xfsprogs commit: ee3754254e8c186c99b6cdd4d59f741759d04acb Kernel commit 5ef828c4 ("xfs: avoid false quotacheck after unclean shutdown") made xfs_sb_from_disk() also call xfs_sb_quota_from_disk by default. However, when this was merged to libxfs, existing separate calls to libxfs_sb_quota_from_disk remained, and calling it twice in a row on a V4 superblock leads to issues, because: if (sbp->sb_qflags & XFS_PQUOTA_ACCT) { ... sbp->sb_pquotino = sbp->sb_gquotino; sbp->sb_gquotino = NULLFSINO; and after the second call, we have set both pquotino and gquotino to NULLFSINO. Fix this by making it safe to call twice, and also remove the extra calls to libxfs_sb_quota_from_disk. This is only spotted when running xfstests with "-m crc=0" because the sb_from_disk change came about after V5 became default, and the above behavior only exists on a V4 superblock. Reported-by: Eryu Guan Signed-off-by: Eric Sandeen Reviewed-by: Carlos Maiolino Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_sb.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index a70aec910626..7a392402f2f8 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -338,13 +338,16 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); - if (sbp->sb_qflags & XFS_PQUOTA_ACCT) { + if (sbp->sb_qflags & XFS_PQUOTA_ACCT && + sbp->sb_gquotino != NULLFSINO) { /* * In older version of superblock, on-disk superblock only * has sb_gquotino, and in-core superblock has both sb_gquotino * and sb_pquotino. But, only one of them is supported at any * point of time. So, if PQUOTA is set in disk superblock, - * copy over sb_gquotino to sb_pquotino. + * copy over sb_gquotino to sb_pquotino. The NULLFSINO test + * above is to make sure we don't do this twice and wipe them + * both out! */ sbp->sb_pquotino = sbp->sb_gquotino; sbp->sb_gquotino = NULLFSINO; -- GitLab From a11f90ca5f306a1c38db4f24db8bff97ffb87bd2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:33 +0100 Subject: [PATCH 0378/1442] xfs: check return value of _trans_reserve_quota_nblks commit 4fd29ec47212c8cbf98916af519019ccc5e58e49 upstream. Check the return value of xfs_trans_reserve_quota_nblks for errors. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index c6eb21940783..71dd6d783710 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4902,8 +4902,11 @@ xfs_bmap_del_extent_delay( * sb counters as we might have to borrow some blocks for the * indirect block accounting. */ - xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0, + error = xfs_trans_reserve_quota_nblks(NULL, ip, + -((long)del->br_blockcount), 0, isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + if (error) + return error; ip->i_delayed_blks -= del->br_blockcount; if (whichfork == XFS_COW_FORK) -- GitLab From 2f092422e1ce1f155c6a9da147a78759f0e04f40 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 9 Jan 2017 16:38:34 +0100 Subject: [PATCH 0379/1442] xfs: don't skip cow forks w/ delalloc blocks in cowblocks scan commit 399372349a7f9b2d7e56e4fa4467c69822d07024 upstream. The cowblocks background scanner currently clears the cowblocks tag for inodes without any real allocations in the cow fork. This excludes inodes with only delalloc blocks in the cow fork. While we might never expect to clear delalloc blocks from the cow fork in the background scanner, it is not necessarily correct to clear the cowblocks tag from such inodes. For example, if the background scanner happens to process an inode between a buffered write and writeback, the scanner catches the inode in a state after delalloc blocks have been allocated to the cow fork but before the delalloc blocks have been converted to real blocks by writeback. The background scanner then incorrectly clears the cowblocks tag, even if part of the aforementioned delalloc reservation will not be remapped to the data fork (i.e., extra blocks due to the cowextsize hint). This means that any such additional blocks in the cow fork might never be reclaimed by the background scanner and could persist until the inode itself is reclaimed. To address this problem, only skip and clear inodes without any cow fork allocations whatsoever from the background scanner. While we generally do not want to cancel delalloc reservations from the background scanner, the pagecache dirty check following the cowblocks check should prevent that situation. If we do end up with delalloc cow fork blocks without a dirty address space mapping, this is probably an indication that something has gone wrong and the blocks should be reclaimed, as they may never be converted to a real allocation. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_icache.c | 7 ++++++- fs/xfs/xfs_reflink.c | 34 ---------------------------------- fs/xfs/xfs_reflink.h | 2 -- 3 files changed, 6 insertions(+), 37 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f295049db681..1b4861f5d3d8 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1580,10 +1580,15 @@ xfs_inode_free_cowblocks( struct xfs_eofblocks *eofb = args; bool need_iolock = true; int match; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); - if (!xfs_reflink_has_real_cow_blocks(ip)) { + /* + * Just clear the tag if we have an empty cow fork or none at all. It's + * possible the inode was fully unshared since it was originally tagged. + */ + if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { trace_xfs_inode_free_cowblocks_invalid(ip); xfs_inode_clear_cowblocks_tag(ip); return 0; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index a279b4e7f5fe..c06904893202 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1697,37 +1697,3 @@ xfs_reflink_unshare( trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); return error; } - -/* - * Does this inode have any real CoW reservations? - */ -bool -xfs_reflink_has_real_cow_blocks( - struct xfs_inode *ip) -{ - struct xfs_bmbt_irec irec; - struct xfs_ifork *ifp; - struct xfs_bmbt_rec_host *gotp; - xfs_extnum_t idx; - - if (!xfs_is_reflink_inode(ip)) - return false; - - /* Go find the old extent in the CoW fork. */ - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - gotp = xfs_iext_bno_to_ext(ifp, 0, &idx); - while (gotp) { - xfs_bmbt_get_all(gotp, &irec); - - if (!isnullstartblock(irec.br_startblock)) - return true; - - /* Roll on... */ - idx++; - if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - break; - gotp = xfs_iext_get_ext(ifp, idx); - } - - return false; -} diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index fad11607c9ad..97ea9b487884 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -50,6 +50,4 @@ extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); -extern bool xfs_reflink_has_real_cow_blocks(struct xfs_inode *ip); - #endif /* __XFS_REFLINK_H */ -- GitLab From 3978c5bb004312fd267aed7279fe64b119e126b0 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 9 Jan 2017 16:38:35 +0100 Subject: [PATCH 0380/1442] xfs: don't BUG() on mixed direct and mapped I/O commit 04197b341f23b908193308b8d63d17ff23232598 upstream. We've had reports of generic/095 causing XFS to BUG() in __xfs_get_blocks() due to the existence of delalloc blocks on a direct I/O read. generic/095 issues a mix of various types of I/O, including direct and memory mapped I/O to a single file. This is clearly not supported behavior and is known to lead to such problems. E.g., the lack of exclusion between the direct I/O and write fault paths means that a write fault can allocate delalloc blocks in a region of a file that was previously a hole after the direct read has attempted to flush/inval the file range, but before it actually reads the block mapping. In turn, the direct read discovers a delalloc extent and cannot proceed. While the appropriate solution here is to not mix direct and memory mapped I/O to the same regions of the same file, the current BUG_ON() behavior is probably overkill as it can crash the entire system. Instead, localize the failure to the I/O in question by returning an error for a direct I/O that cannot be handled safely due to delalloc blocks. Be careful to allow the case of a direct write to post-eof delalloc blocks. This can occur due to speculative preallocation and is safe as post-eof blocks are not accompanied by dirty pages in pagecache (conversely, preallocation within eof must have been zeroed, and thus dirtied, before the inode size could have been increased beyond said blocks). Finally, provide an additional warning if a direct I/O write occurs while the file is memory mapped. This may not catch all problematic scenarios, but provides a hint that some known-to-be-problematic I/O methods are in use. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3e57a56cf829..2693ba84ec25 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1361,6 +1361,26 @@ __xfs_get_blocks( if (error) goto out_unlock; + /* + * The only time we can ever safely find delalloc blocks on direct I/O + * is a dio write to post-eof speculative preallocation. All other + * scenarios are indicative of a problem or misuse (such as mixing + * direct and mapped I/O). + * + * The file may be unmapped by the time we get here so we cannot + * reliably fail the I/O based on mapping. Instead, fail the I/O if this + * is a read or a write within eof. Otherwise, carry on but warn as a + * precuation if the file happens to be mapped. + */ + if (direct && imap.br_startblock == DELAYSTARTBLOCK) { + if (!create || offset < i_size_read(VFS_I(ip))) { + WARN_ON_ONCE(1); + error = -EIO; + goto out_unlock; + } + WARN_ON_ONCE(mapping_mapped(VFS_I(ip)->i_mapping)); + } + /* for DAX, we convert unwritten extents directly */ if (create && (!nimaps || @@ -1450,8 +1470,6 @@ __xfs_get_blocks( (new || ISUNWRITTEN(&imap)))) set_buffer_new(bh_result); - BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK); - return 0; out_unlock: -- GitLab From f380ee72a7a498c934afe3aa67a50242b935f540 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 9 Jan 2017 16:38:36 +0100 Subject: [PATCH 0381/1442] xfs: provide helper for counting extents from if_bytes commit 5d829300bee000980a09ac2ccb761cb25867b67c upstream. The open-coded pattern: ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) is all over the xfs code; provide a new helper xfs_iext_count(ifp) to count the number of inline extents in an inode fork. [dchinner: pick up several missed conversions] Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 49 ++++++++++++++++------------------ fs/xfs/libxfs/xfs_inode_fork.c | 31 ++++++++++++--------- fs/xfs/libxfs/xfs_inode_fork.h | 1 + fs/xfs/xfs_bmap_util.c | 34 ++++++++++------------- fs/xfs/xfs_inode_item.c | 4 +-- fs/xfs/xfs_ioctl.c | 6 ++--- fs/xfs/xfs_qm.c | 2 +- fs/xfs/xfs_reflink.c | 4 +-- 8 files changed, 64 insertions(+), 67 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 71dd6d783710..5c3c4dd14735 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -515,7 +515,7 @@ xfs_bmap_trace_exlist( state |= BMAP_ATTRFORK; ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT(cnt == xfs_iext_count(ifp)); for (idx = 0; idx < cnt; idx++) trace_xfs_extlist(ip, idx, whichfork, caller_ip); } @@ -811,7 +811,7 @@ xfs_bmap_extents_to_btree( XFS_BTREE_LONG_PTRS); arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); for (cnt = i = 0; i < nextents; i++) { ep = xfs_iext_get_ext(ifp, i); if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) { @@ -1296,7 +1296,7 @@ xfs_bmap_read_extents( /* * Here with bp and block set to the leftmost leaf node in the tree. */ - room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + room = xfs_iext_count(ifp); i = 0; /* * Loop over all leaf nodes. Copy information to the extent records. @@ -1361,7 +1361,7 @@ xfs_bmap_read_extents( return error; block = XFS_BUF_TO_BLOCK(bp); } - ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT(i == xfs_iext_count(ifp)); ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); return 0; @@ -1404,7 +1404,7 @@ xfs_bmap_search_multi_extents( if (lastx > 0) { xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp); } - if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { + if (lastx < xfs_iext_count(ifp)) { xfs_bmbt_get_all(ep, gotp); *eofp = 0; } else { @@ -1497,7 +1497,7 @@ xfs_bmap_first_unused( (error = xfs_iread_extents(tp, ip, whichfork))) return error; lowest = *first_unused; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) { xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); off = xfs_bmbt_get_startoff(ep); @@ -1582,7 +1582,7 @@ xfs_bmap_last_extent( return error; } - nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { *is_empty = 1; return 0; @@ -1735,7 +1735,7 @@ xfs_bmap_add_extent_delay_real( &bma->ip->i_d.di_nextents); ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(bma->idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); @@ -1794,7 +1794,7 @@ xfs_bmap_add_extent_delay_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (bma->idx < xfs_iext_count(ifp) - 1) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); @@ -2300,7 +2300,7 @@ xfs_bmap_add_extent_unwritten_real( ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); ASSERT(*idx >= 0); - ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); XFS_STATS_INC(mp, xs_add_exlist); @@ -2356,7 +2356,7 @@ xfs_bmap_add_extent_unwritten_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (*idx < xfs_iext_count(&ip->i_df) - 1) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) @@ -2836,7 +2836,7 @@ xfs_bmap_add_extent_hole_delay( * Check and set flags if the current (right) segment exists. * If it doesn't exist, we're converting the hole at end-of-file. */ - if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (*idx < xfs_iext_count(ifp)) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); @@ -2966,7 +2966,7 @@ xfs_bmap_add_extent_hole_real( ifp = XFS_IFORK_PTR(bma->ip, whichfork); ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(bma->idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); @@ -2992,7 +2992,7 @@ xfs_bmap_add_extent_hole_real( * Check and set flags if this segment has a current value. * Not true if we're inserting into the "hole" at eof. */ - if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (bma->idx < xfs_iext_count(ifp)) { state |= BMAP_RIGHT_VALID; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); if (isnullstartblock(right.br_startblock)) @@ -4221,7 +4221,7 @@ xfs_bmapi_read( break; /* Else go on to the next record. */ - if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) + if (++lastx < xfs_iext_count(ifp)) xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); else eof = 1; @@ -4733,7 +4733,7 @@ xfs_bmapi_write( /* Else go on to the next record. */ bma.prev = bma.got; - if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) { + if (++bma.idx < xfs_iext_count(ifp)) { xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx), &bma.got); } else @@ -4885,7 +4885,7 @@ xfs_bmap_del_extent_delay( da_new = 0; ASSERT(*idx >= 0); - ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(del->br_blockcount > 0); ASSERT(got->br_startoff <= del->br_startoff); ASSERT(got_endoff >= del_endoff); @@ -5016,7 +5016,7 @@ xfs_bmap_del_extent_cow( got_endoff = got->br_startoff + got->br_blockcount; ASSERT(*idx >= 0); - ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(del->br_blockcount > 0); ASSERT(got->br_startoff <= del->br_startoff); ASSERT(got_endoff >= del_endoff); @@ -5122,8 +5122,7 @@ xfs_bmap_del_extent( state |= BMAP_COWFORK; ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / - (uint)sizeof(xfs_bmbt_rec_t))); + ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp))); ASSERT(del->br_blockcount > 0); ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &got); @@ -5448,7 +5447,6 @@ __xfs_bunmapi( int logflags; /* transaction logging flags */ xfs_extlen_t mod; /* rt extent offset */ xfs_mount_t *mp; /* mount structure */ - xfs_extnum_t nextents; /* number of file extents */ xfs_bmbt_irec_t prev; /* previous extent record */ xfs_fileoff_t start; /* first file offset deleted */ int tmp_logflags; /* partial logging flags */ @@ -5480,8 +5478,7 @@ __xfs_bunmapi( if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (nextents == 0) { + if (xfs_iext_count(ifp) == 0) { *rlen = 0; return 0; } @@ -5966,7 +5963,7 @@ xfs_bmse_shift_one( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); xfs_bmbt_get_all(gotp, &got); @@ -6143,7 +6140,7 @@ xfs_bmap_shift_extents( * are collapsing out, so we cannot use the count of real extents here. * Instead we have to calculate it from the incore fork. */ - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); if (total_extents == 0) { *done = 1; goto del_cursor; @@ -6203,7 +6200,7 @@ xfs_bmap_shift_extents( * count can change. Update the total and grade the next record. */ if (direction == SHIFT_LEFT) { - total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + total_extents = xfs_iext_count(ifp); stop_extent = total_extents; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 5dd56d3dbb3a..5fbe24c31679 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -775,6 +775,13 @@ xfs_idestroy_fork( } } +/* Count number of incore extents based on if_bytes */ +xfs_extnum_t +xfs_iext_count(struct xfs_ifork *ifp) +{ + return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); +} + /* * Convert in-core extents to on-disk form * @@ -803,7 +810,7 @@ xfs_iextents_copy( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(ifp->if_bytes > 0); - nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nrecs = xfs_iext_count(ifp); XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); ASSERT(nrecs > 0); @@ -941,7 +948,7 @@ xfs_iext_get_ext( xfs_extnum_t idx) /* index of target extent */ { ASSERT(idx >= 0); - ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + ASSERT(idx < xfs_iext_count(ifp)); if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { return ifp->if_u1.if_ext_irec->er_extbuf; @@ -1017,7 +1024,7 @@ xfs_iext_add( int new_size; /* size of extents after adding */ xfs_extnum_t nextents; /* number of extents in file */ - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT((idx >= 0) && (idx <= nextents)); byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); new_size = ifp->if_bytes + byte_diff; @@ -1241,7 +1248,7 @@ xfs_iext_remove( trace_xfs_iext_remove(ip, idx, state, _RET_IP_); ASSERT(ext_diff > 0); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); if (new_size == 0) { @@ -1270,7 +1277,7 @@ xfs_iext_remove_inline( ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); ASSERT(idx < XFS_INLINE_EXTS); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(((nextents - ext_diff) > 0) && (nextents - ext_diff) < XFS_INLINE_EXTS); @@ -1309,7 +1316,7 @@ xfs_iext_remove_direct( ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); new_size = ifp->if_bytes - (ext_diff * sizeof(xfs_bmbt_rec_t)); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (new_size == 0) { xfs_iext_destroy(ifp); @@ -1546,7 +1553,7 @@ xfs_iext_indirect_to_direct( int size; /* size of file extents */ ASSERT(ifp->if_flags & XFS_IFEXTIREC); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(nextents <= XFS_LINEAR_EXTS); size = nextents * sizeof(xfs_bmbt_rec_t); @@ -1620,7 +1627,7 @@ xfs_iext_bno_to_ext( xfs_extnum_t nextents; /* number of file extents */ xfs_fileoff_t startoff = 0; /* start offset of extent */ - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { *idxp = 0; return NULL; @@ -1733,8 +1740,8 @@ xfs_iext_idx_to_irec( ASSERT(ifp->if_flags & XFS_IFEXTIREC); ASSERT(page_idx >= 0); - ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); - ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); + ASSERT(page_idx <= xfs_iext_count(ifp)); + ASSERT(page_idx < xfs_iext_count(ifp) || realloc); nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; erp_idx = 0; @@ -1782,7 +1789,7 @@ xfs_iext_irec_init( xfs_extnum_t nextents; /* number of extents in file */ ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); ASSERT(nextents <= XFS_LINEAR_EXTS); erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); @@ -1906,7 +1913,7 @@ xfs_iext_irec_compact( ASSERT(ifp->if_flags & XFS_IFEXTIREC); nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); if (nextents == 0) { xfs_iext_destroy(ifp); diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index c9476f50e32d..8bf112e29aa1 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -152,6 +152,7 @@ void xfs_init_local_fork(struct xfs_inode *, int, const void *, int); struct xfs_bmbt_rec_host * xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); +xfs_extnum_t xfs_iext_count(struct xfs_ifork *); void xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t, struct xfs_bmbt_irec *, int); void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 47074e0c33f3..0670a8bd5818 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -359,9 +359,7 @@ xfs_bmap_count_blocks( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { - xfs_bmap_count_leaves(ifp, 0, - ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), - count); + xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count); return 0; } @@ -426,7 +424,7 @@ xfs_getbmapx_fix_eof_hole( ifp = XFS_IFORK_PTR(ip, whichfork); if (!moretocome && xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && - (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) + (lastx == xfs_iext_count(ifp) - 1)) out->bmv_oflags |= BMV_OF_LAST; } @@ -1878,15 +1876,13 @@ xfs_swap_extent_forks( switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: - /* If the extents fit in the inode, fix the - * pointer. Otherwise it's already NULL or - * pointing to the extent. + /* + * If the extents fit in the inode, fix the pointer. Otherwise + * it's already NULL or pointing to the extent. */ - nextents = ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (nextents <= XFS_INLINE_EXTS) { - ifp->if_u1.if_extents = - ifp->if_u2.if_inline_ext; - } + nextents = xfs_iext_count(&ip->i_df); + if (nextents <= XFS_INLINE_EXTS) + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; (*src_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: @@ -1898,15 +1894,13 @@ xfs_swap_extent_forks( switch (tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: - /* If the extents fit in the inode, fix the - * pointer. Otherwise it's already NULL or - * pointing to the extent. + /* + * If the extents fit in the inode, fix the pointer. Otherwise + * it's already NULL or pointing to the extent. */ - nextents = tip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - if (nextents <= XFS_INLINE_EXTS) { - tifp->if_u1.if_extents = - tifp->if_u2.if_inline_ext; - } + nextents = xfs_iext_count(&tip->i_df); + if (nextents <= XFS_INLINE_EXTS) + tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; (*target_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 9610e9c00952..d90e7811ccdd 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -164,7 +164,7 @@ xfs_inode_item_format_data_fork( struct xfs_bmbt_rec *p; ASSERT(ip->i_df.if_u1.if_extents != NULL); - ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); + ASSERT(xfs_iext_count(&ip->i_df) > 0); p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK); @@ -261,7 +261,7 @@ xfs_inode_item_format_attr_fork( ip->i_afp->if_bytes > 0) { struct xfs_bmbt_rec *p; - ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == + ASSERT(xfs_iext_count(ip->i_afp) == ip->i_d.di_anextents); ASSERT(ip->i_afp->if_u1.if_extents != NULL); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c245bed3249b..a39197501a7c 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -910,16 +910,14 @@ xfs_ioc_fsgetxattr( if (attr) { if (ip->i_afp) { if (ip->i_afp->if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = ip->i_afp->if_bytes / - sizeof(xfs_bmbt_rec_t); + fa.fsx_nextents = xfs_iext_count(ip->i_afp); else fa.fsx_nextents = ip->i_d.di_anextents; } else fa.fsx_nextents = 0; } else { if (ip->i_df.if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = ip->i_df.if_bytes / - sizeof(xfs_bmbt_rec_t); + fa.fsx_nextents = xfs_iext_count(&ip->i_df); else fa.fsx_nextents = ip->i_d.di_nextents; } diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index a60d9e2739d1..45e50ea90769 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1135,7 +1135,7 @@ xfs_qm_get_rtblks( return error; } rtblks = 0; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + nextents = xfs_iext_count(ifp); for (idx = 0; idx < nextents; idx++) rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); *O_rtblks = (xfs_qcnt_t)rtblks; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index c06904893202..0edf835af32d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -486,7 +486,7 @@ xfs_reflink_trim_irec_to_next_cow( /* This is the extent before; try sliding up one. */ if (irec.br_startoff < offset_fsb) { idx++; - if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) + if (idx >= xfs_iext_count(ifp)) return 0; gotp = xfs_iext_get_ext(ifp, idx); xfs_bmbt_get_all(gotp, &irec); @@ -566,7 +566,7 @@ xfs_reflink_cancel_cow_blocks( xfs_bmap_del_extent_cow(ip, &idx, &got, &del); } - if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)) + if (++idx >= xfs_iext_count(ifp)) break; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); } -- GitLab From 63fa793e757dfc08c09865d68936ce3d67a00047 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:37 +0100 Subject: [PATCH 0382/1442] xfs: check minimum block size for CRC filesystems commit bec9d48d7a303a5bb95c05961ff07ec7eeb59058 upstream. Check the minimum block size on v5 filesystems. [dchinner: cleaned up XFS_MIN_CRC_BLOCKSIZE check] Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_sb.c | 6 ++++++ fs/xfs/libxfs/xfs_types.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 7a392402f2f8..2580262e4ea0 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -262,6 +262,12 @@ xfs_mount_validate_sb( return -EFSCORRUPTED; } + if (xfs_sb_version_hascrc(&mp->m_sb) && + sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { + xfs_notice(mp, "v5 SB sanity check failed"); + return -EFSCORRUPTED; + } + /* * Until this is fixed only page-sized or smaller data blocks work. */ diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 8d74870468c2..cf044c0f4d41 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -75,11 +75,14 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ * Minimum and maximum blocksize and sectorsize. * The blocksize upper limit is pretty much arbitrary. * The sectorsize upper limit is due to sizeof(sb_sectsize). + * CRC enable filesystems use 512 byte inodes, meaning 512 byte block sizes + * cannot be used. */ #define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ #define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ #define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) #define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) +#define XFS_MIN_CRC_BLOCKSIZE (1 << (XFS_MIN_BLOCKSIZE_LOG + 1)) #define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */ #define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */ #define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG) -- GitLab From b49ef758f6003a7ed0afb37b33fc96e238752497 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 9 Jan 2017 16:38:38 +0100 Subject: [PATCH 0383/1442] xfs: fix unbalanced inode reclaim flush locking commit 98efe8af1c9ffac47e842b7a75ded903e2f028da upstream. Filesystem shutdown testing on an older distro kernel has uncovered an imbalanced locking pattern for the inode flush lock in xfs_reclaim_inode(). Specifically, there is a double unlock sequence between the call to xfs_iflush_abort() and xfs_reclaim_inode() at the "reclaim:" label. This actually does not cause obvious problems on current kernels due to the current flush lock implementation. Older kernels use a counting based flush lock mechanism, however, which effectively breaks the lock indefinitely when an already unlocked flush lock is repeatedly unlocked. Though this only currently occurs on filesystem shutdown, it has reproduced the effect of elevating an fs shutdown to a system-wide crash or hang. As it turns out, the flush lock is not actually required for the reclaim logic in xfs_reclaim_inode() because by that time we have already cycled the flush lock once while holding ILOCK_EXCL. Therefore, remove the additional flush lock/unlock cycle around the 'reclaim:' label and update branches into this label to release the flush lock where appropriate. Add an assert to xfs_ifunlock() to help prevent future occurences of the same problem. Reported-by: Zorro Lang Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_icache.c | 27 ++++++++++++++------------- fs/xfs/xfs_inode.h | 11 ++++++----- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 1b4861f5d3d8..9c3e5c6ddf20 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -123,7 +123,6 @@ __xfs_inode_free( { /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!xfs_isiflocked(ip)); XFS_STATS_DEC(ip->i_mount, vn_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); @@ -133,6 +132,8 @@ void xfs_inode_free( struct xfs_inode *ip) { + ASSERT(!xfs_isiflocked(ip)); + /* * Because we use RCU freeing we need to ensure the inode always * appears to be reclaimed with an invalid inode number when in the @@ -981,6 +982,7 @@ xfs_reclaim_inode( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_iunpin_wait(ip); + /* xfs_iflush_abort() drops the flush lock */ xfs_iflush_abort(ip, false); goto reclaim; } @@ -989,10 +991,10 @@ xfs_reclaim_inode( goto out_ifunlock; xfs_iunpin_wait(ip); } - if (xfs_iflags_test(ip, XFS_ISTALE)) - goto reclaim; - if (xfs_inode_clean(ip)) + if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) { + xfs_ifunlock(ip); goto reclaim; + } /* * Never flush out dirty data during non-blocking reclaim, as it would @@ -1030,25 +1032,24 @@ xfs_reclaim_inode( xfs_buf_relse(bp); } - xfs_iflock(ip); reclaim: + ASSERT(!xfs_isiflocked(ip)); + /* * Because we use RCU freeing we need to ensure the inode always appears * to be reclaimed with an invalid inode number when in the free state. - * We do this as early as possible under the ILOCK and flush lock so - * that xfs_iflush_cluster() can be guaranteed to detect races with us - * here. By doing this, we guarantee that once xfs_iflush_cluster has - * locked both the XFS_ILOCK and the flush lock that it will see either - * a valid, flushable inode that will serialise correctly against the - * locks below, or it will see a clean (and invalid) inode that it can - * skip. + * We do this as early as possible under the ILOCK so that + * xfs_iflush_cluster() can be guaranteed to detect races with us here. + * By doing this, we guarantee that once xfs_iflush_cluster has locked + * XFS_ILOCK that it will see either a valid, flushable inode that will + * serialise correctly, or it will see a clean (and invalid) inode that + * it can skip. */ spin_lock(&ip->i_flags_lock); ip->i_flags = XFS_IRECLAIM; ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); - xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f14c1de2549d..71e8a81c91a3 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -246,6 +246,11 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) * Synchronize processes attempting to flush the in-core inode back to disk. */ +static inline int xfs_isiflocked(struct xfs_inode *ip) +{ + return xfs_iflags_test(ip, XFS_IFLOCK); +} + extern void __xfs_iflock(struct xfs_inode *ip); static inline int xfs_iflock_nowait(struct xfs_inode *ip) @@ -261,16 +266,12 @@ static inline void xfs_iflock(struct xfs_inode *ip) static inline void xfs_ifunlock(struct xfs_inode *ip) { + ASSERT(xfs_isiflocked(ip)); xfs_iflags_clear(ip, XFS_IFLOCK); smp_mb(); wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); } -static inline int xfs_isiflocked(struct xfs_inode *ip) -{ - return xfs_iflags_test(ip, XFS_IFLOCK); -} - /* * Flags for inode locking. * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) -- GitLab From 49dc19915d3b0893a698a2d78f732beaff119a4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 16:38:39 +0100 Subject: [PATCH 0384/1442] xfs: new inode extent list lookup helpers commit 93533c7855c3c78c8a900cac65c8d669bb14935d upstream. xfs_iext_lookup_extent looks up a single extent at the passed in offset, and returns the extent covering the area, or the one behind it in case of a hole, as well as the index of the returned extent in arguments, as well as a simple bool as return value that is set to false if no extent could be found because the offset is behind EOF. It is a simpler replacement for xfs_bmap_search_extent that leaves looking up the rarely needed previous extent to the caller and has a nicer calling convention. xfs_iext_get_extent is a helper for iterating over the extent list, it takes an extent index as input, and returns the extent at that index in it's expanded form in an argument if it exists. The actual return value is a bool whether the index is valid or not. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_inode_fork.c | 46 ++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_inode_fork.h | 6 +++++ 2 files changed, 52 insertions(+) diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 5fbe24c31679..222e103356c6 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -2003,3 +2003,49 @@ xfs_ifork_init_cow( ip->i_cformat = XFS_DINODE_FMT_EXTENTS; ip->i_cnextents = 0; } + +/* + * Lookup the extent covering bno. + * + * If there is an extent covering bno return the extent index, and store the + * expanded extent structure in *gotp, and the extent index in *idx. + * If there is no extent covering bno, but there is an extent after it (e.g. + * it lies in a hole) return that extent in *gotp and its index in *idx + * instead. + * If bno is beyond the last extent return false, and return the index after + * the last valid index in *idxp. + */ +bool +xfs_iext_lookup_extent( + struct xfs_inode *ip, + struct xfs_ifork *ifp, + xfs_fileoff_t bno, + xfs_extnum_t *idxp, + struct xfs_bmbt_irec *gotp) +{ + struct xfs_bmbt_rec_host *ep; + + XFS_STATS_INC(ip->i_mount, xs_look_exlist); + + ep = xfs_iext_bno_to_ext(ifp, bno, idxp); + if (!ep) + return false; + xfs_bmbt_get_all(ep, gotp); + return true; +} + +/* + * Return true if there is an extent at index idx, and return the expanded + * extent structure at idx in that case. Else return false. + */ +bool +xfs_iext_get_extent( + struct xfs_ifork *ifp, + xfs_extnum_t idx, + struct xfs_bmbt_irec *gotp) +{ + if (idx < 0 || idx >= xfs_iext_count(ifp)) + return false; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp); + return true; +} diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 8bf112e29aa1..7fb8365326d1 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -182,6 +182,12 @@ void xfs_iext_irec_compact_pages(struct xfs_ifork *); void xfs_iext_irec_compact_full(struct xfs_ifork *); void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int); +bool xfs_iext_lookup_extent(struct xfs_inode *ip, + struct xfs_ifork *ifp, xfs_fileoff_t bno, + xfs_extnum_t *idxp, struct xfs_bmbt_irec *gotp); +bool xfs_iext_get_extent(struct xfs_ifork *ifp, xfs_extnum_t idx, + struct xfs_bmbt_irec *gotp); + extern struct kmem_zone *xfs_ifork_zone; extern void xfs_ifork_init_cow(struct xfs_inode *ip); -- GitLab From 2b7dae91a1341302077798ddc38e8bfd8a012cf1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:40 +0100 Subject: [PATCH 0385/1442] xfs: factor rmap btree size into the indlen calculations commit fd26a88093bab6529ea2de819114ca92dbd1d71d upstream. When we're estimating the amount of space it's going to take to satisfy a delalloc reservation, we need to include the space that we might need to grow the rmapbt. This helps us to avoid running out of space later when _iomap_write_allocate needs more space than we reserved. Eryu Guan observed this happening on generic/224 when sunit/swidth were set. Reported-by: Eryu Guan Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 5c3c4dd14735..00188f559c8d 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -49,6 +49,7 @@ #include "xfs_rmap.h" #include "xfs_ag_resv.h" #include "xfs_refcount.h" +#include "xfs_rmap_btree.h" kmem_zone_t *xfs_bmap_free_item_zone; @@ -190,8 +191,12 @@ xfs_bmap_worst_indlen( int maxrecs; /* maximum record count at this level */ xfs_mount_t *mp; /* mount structure */ xfs_filblks_t rval; /* return value */ + xfs_filblks_t orig_len; mp = ip->i_mount; + + /* Calculate the worst-case size of the bmbt. */ + orig_len = len; maxrecs = mp->m_bmap_dmxr[0]; for (level = 0, rval = 0; level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); @@ -199,12 +204,20 @@ xfs_bmap_worst_indlen( len += maxrecs - 1; do_div(len, maxrecs); rval += len; - if (len == 1) - return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - + if (len == 1) { + rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - level - 1; + break; + } if (level == 0) maxrecs = mp->m_bmap_dmxr[1]; } + + /* Calculate the worst-case size of the rmapbt. */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) + + mp->m_rmap_maxlevels; + return rval; } -- GitLab From 3903257660338c41c22a2464407c8ff93c42e00b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:41 +0100 Subject: [PATCH 0386/1442] xfs: always succeed when deduping zero bytes commit fba3e594ef0ad911fa8f559732d588172f212d71 upstream. It turns out that btrfs and xfs had differing interpretations of what to do when the dedupe length is zero. Change xfs to follow btrfs' semantics so that the userland interface is consistent. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_reflink.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 0edf835af32d..4d99100a4bbf 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1345,8 +1345,14 @@ xfs_reflink_remap_range( goto out_unlock; } - if (len == 0) + /* Zero length dedupe exits immediately; reflink goes to EOF. */ + if (len == 0) { + if (is_dedupe) { + ret = 0; + goto out_unlock; + } len = isize - pos_in; + } /* Ensure offsets don't wrap and the input is inside i_size */ if (pos_in + len < pos_in || pos_out + len < pos_out || -- GitLab From cf4fb510473b4529a0969b1d0af898b7a844bc36 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 16:38:42 +0100 Subject: [PATCH 0387/1442] xfs: remove prev argument to xfs_bmapi_reserve_delalloc commit 65c5f419788d623a0410eca1866134f5e4628594 upstream. We can easily lookup the previous extent for the cases where we need it, which saves the callers from looking it up for us later in the series. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 8 ++++++-- fs/xfs/libxfs/xfs_bmap.h | 3 +-- fs/xfs/xfs_iomap.c | 3 +-- fs/xfs/xfs_reflink.c | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 00188f559c8d..09b5d4f07636 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4250,7 +4250,6 @@ xfs_bmapi_reserve_delalloc( xfs_fileoff_t aoff, xfs_filblks_t len, struct xfs_bmbt_irec *got, - struct xfs_bmbt_irec *prev, xfs_extnum_t *lastx, int eof) { @@ -4272,7 +4271,12 @@ xfs_bmapi_reserve_delalloc( else extsz = xfs_get_extsz_hint(ip); if (extsz) { - error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, + struct xfs_bmbt_irec prev; + + if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev)) + prev.br_startoff = NULLFILEOFF; + + error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, 1, 0, &aoff, &alen); ASSERT(!error); } diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 7cae6ec27fa6..e3c2b5adf505 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -243,8 +243,7 @@ struct xfs_bmbt_rec_host * struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp); int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, xfs_fileoff_t aoff, xfs_filblks_t len, - struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *prev, - xfs_extnum_t *lastx, int eof); + struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof); enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 436e109bb01e..59ffcac8a47d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -622,8 +622,7 @@ xfs_file_iomap_begin_delay( retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, - end_fsb - offset_fsb, &got, - &prev, &idx, eof); + end_fsb - offset_fsb, &got, &idx, eof); switch (error) { case 0: break; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 4d99100a4bbf..ae17b81e7053 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -293,7 +293,7 @@ xfs_reflink_reserve_cow( retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, - end_fsb - imap->br_startoff, &got, &prev, &idx, eof); + end_fsb - imap->br_startoff, &got, &idx, eof); switch (error) { case 0: break; -- GitLab From cf168f2ff8bae8a4eb966c3c0f055086c1fae87d Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 9 Jan 2017 16:38:43 +0100 Subject: [PATCH 0388/1442] xfs: track preallocation separately in xfs_bmapi_reserve_delalloc() commit 974ae922efd93b07b6cdf989ae959883f6f05fd8 upstream. Speculative preallocation is currently processed entirely by the callers of xfs_bmapi_reserve_delalloc(). The caller determines how much preallocation to include, adjusts the extent length and passes down the resulting request. While this works fine for post-eof speculative preallocation, it is not as reliable for COW fork preallocation. COW fork preallocation is implemented via the cowextszhint, which aligns the start offset as well as the length of the extent. Further, it is difficult for the caller to accurately identify when preallocation occurs because the returned extent could have been merged with neighboring extents in the fork. To simplify this situation and facilitate further COW fork preallocation enhancements, update xfs_bmapi_reserve_delalloc() to take a separate preallocation parameter to incorporate into the allocation request. The preallocation blocks value is tacked onto the end of the request and adjusted to accommodate neighboring extents and extent size limits. Since xfs_bmapi_reserve_delalloc() now knows precisely how much preallocation was included in the allocation, it can also tag the inodes appropriately to support preallocation reclaim. Note that xfs_bmapi_reserve_delalloc() callers are not yet updated to use the preallocation mechanism. This patch should not change behavior outside of correctly tagging reflink inodes when start offset preallocation occurs (which the caller does not handle correctly). Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 23 +++++++++++++++++++++-- fs/xfs/libxfs/xfs_bmap.h | 2 +- fs/xfs/xfs_iomap.c | 2 +- fs/xfs/xfs_reflink.c | 2 +- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 09b5d4f07636..aab523a7dcaf 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -50,6 +50,7 @@ #include "xfs_ag_resv.h" #include "xfs_refcount.h" #include "xfs_rmap_btree.h" +#include "xfs_icache.h" kmem_zone_t *xfs_bmap_free_item_zone; @@ -4247,8 +4248,9 @@ int xfs_bmapi_reserve_delalloc( struct xfs_inode *ip, int whichfork, - xfs_fileoff_t aoff, + xfs_fileoff_t off, xfs_filblks_t len, + xfs_filblks_t prealloc, struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof) @@ -4260,10 +4262,17 @@ xfs_bmapi_reserve_delalloc( char rt = XFS_IS_REALTIME_INODE(ip); xfs_extlen_t extsz; int error; + xfs_fileoff_t aoff = off; - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN); + /* + * Cap the alloc length. Keep track of prealloc so we know whether to + * tag the inode before we return. + */ + alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN); if (!eof) alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); + if (prealloc && alen >= len) + prealloc = alen - len; /* Figure out the extent size, adjust alen */ if (whichfork == XFS_COW_FORK) @@ -4329,6 +4338,16 @@ xfs_bmapi_reserve_delalloc( */ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); + /* + * Tag the inode if blocks were preallocated. Note that COW fork + * preallocation can occur at the start or end of the extent, even when + * prealloc == 0, so we must also check the aligned offset and length. + */ + if (whichfork == XFS_DATA_FORK && prealloc) + xfs_inode_set_eofblocks_tag(ip); + if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) + xfs_inode_set_cowblocks_tag(ip); + ASSERT(got->br_startoff <= aoff); ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); ASSERT(isnullstartblock(got->br_startblock)); diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index e3c2b5adf505..d6d175a4fdec 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -242,7 +242,7 @@ struct xfs_bmbt_rec_host * int fork, int *eofp, xfs_extnum_t *lastxp, struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp); int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, - xfs_fileoff_t aoff, xfs_filblks_t len, + xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, struct xfs_bmbt_irec *got, xfs_extnum_t *lastx, int eof); enum xfs_bmap_intent_type { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 59ffcac8a47d..cc25892cfc63 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -622,7 +622,7 @@ xfs_file_iomap_begin_delay( retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, - end_fsb - offset_fsb, &got, &idx, eof); + end_fsb - offset_fsb, 0, &got, &idx, eof); switch (error) { case 0: break; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index ae17b81e7053..e65cd0a3c055 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -293,7 +293,7 @@ xfs_reflink_reserve_cow( retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, - end_fsb - imap->br_startoff, &got, &idx, eof); + end_fsb - imap->br_startoff, 0, &got, &idx, eof); switch (error) { case 0: break; -- GitLab From 4a323331d8c963aaba4f19f84f16c052bd7eb156 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 16:38:44 +0100 Subject: [PATCH 0389/1442] xfs: use new extent lookup helpers in __xfs_reflink_reserve_cow commit 2755fc4438501c8c28e7783df890e889f6772bee upstream. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_reflink.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e65cd0a3c055..7a30bff82dcf 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -243,10 +243,11 @@ xfs_reflink_reserve_cow( struct xfs_bmbt_irec *imap, bool *shared) { - struct xfs_bmbt_irec got, prev; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec got; xfs_fileoff_t end_fsb, orig_end_fsb; - int eof = 0, error = 0; - bool trimmed; + int error = 0; + bool eof = false, trimmed; xfs_extnum_t idx; xfs_extlen_t align; @@ -258,8 +259,9 @@ xfs_reflink_reserve_cow( * extent list is generally faster than going out to the shared extent * tree. */ - xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx, - &got, &prev); + + if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got)) + eof = true; if (!eof && got.br_startoff <= imap->br_startoff) { trace_xfs_reflink_cow_found(ip, imap); xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); -- GitLab From 3d6e3b12bb4eac5cadb4733530c1967fe9e3d6a9 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 9 Jan 2017 16:38:45 +0100 Subject: [PATCH 0390/1442] xfs: clean up cow fork reservation and tag inodes correctly commit 0260d8ff5f76617e3a55a1c471383ecb4404c3ad upstream. COW fork reservation is implemented via delayed allocation. The code is modeled after the traditional delalloc allocation code, but is slightly different in terms of how preallocation occurs. Rather than post-eof speculative preallocation, COW fork preallocation is implemented via a COW extent size hint that is designed to minimize fragmentation as a reflinked file is split over time. xfs_reflink_reserve_cow() still uses logic that is oriented towards dealing with post-eof speculative preallocation, however, and is stale or not necessarily correct. First, the EOF alignment to the COW extent size hint is implemented in xfs_bmapi_reserve_delalloc() (which does so correctly by aligning the start and end offsets) and so is not necessary in xfs_reflink_reserve_cow(). The backoff and retry logic on ENOSPC is also ineffective for the same reason, as xfs_bmapi_reserve_delalloc() will simply perform the same allocation request on the retry. Finally, since the COW extent size hint aligns the start and end offset of the range to allocate, the end_fsb != orig_end_fsb logic is not sufficient. Indeed, if a write request happens to end on an aligned offset, it is possible that we do not tag the inode for COW preallocation even though xfs_bmapi_reserve_delalloc() may have preallocated at the start offset. Kill the unnecessary, duplicate code in xfs_reflink_reserve_cow(). Remove the inode tag logic as well since xfs_bmapi_reserve_delalloc() has been updated to tag the inode correctly. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_reflink.c | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 7a30bff82dcf..4d3f74e3c5e1 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -245,11 +245,9 @@ xfs_reflink_reserve_cow( { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got; - xfs_fileoff_t end_fsb, orig_end_fsb; int error = 0; bool eof = false, trimmed; xfs_extnum_t idx; - xfs_extlen_t align; /* * Search the COW fork extent list first. This serves two purposes: @@ -287,33 +285,12 @@ xfs_reflink_reserve_cow( if (error) return error; - end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount; - - align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); - if (align) - end_fsb = roundup_64(end_fsb, align); - -retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, - end_fsb - imap->br_startoff, 0, &got, &idx, eof); - switch (error) { - case 0: - break; - case -ENOSPC: - case -EDQUOT: - /* retry without any preallocation */ + imap->br_blockcount, 0, &got, &idx, eof); + if (error == -ENOSPC || error == -EDQUOT) trace_xfs_reflink_cow_enospc(ip, imap); - if (end_fsb != orig_end_fsb) { - end_fsb = orig_end_fsb; - goto retry; - } - /*FALLTHRU*/ - default: + if (error) return error; - } - - if (end_fsb != orig_end_fsb) - xfs_inode_set_cowblocks_tag(ip); trace_xfs_reflink_cow_alloc(ip, &got); return 0; -- GitLab From 553937d3cce8d3806655771099928ab5525fa7e4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 16:38:46 +0100 Subject: [PATCH 0391/1442] xfs: use new extent lookup helpers xfs_file_iomap_begin_delay commit 656152e552e5cbe0c11ad261b524376217c2fb13 upstream. And only lookup the previous extent inside xfs_iomap_prealloc_size if we actually need it. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iomap.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index cc25892cfc63..e4bfde212cc2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -395,11 +395,12 @@ xfs_iomap_prealloc_size( struct xfs_inode *ip, loff_t offset, loff_t count, - xfs_extnum_t idx, - struct xfs_bmbt_irec *prev) + xfs_extnum_t idx) { struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + struct xfs_bmbt_irec prev; int shift = 0; int64_t freesp; xfs_fsblock_t qblocks; @@ -419,8 +420,8 @@ xfs_iomap_prealloc_size( */ if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) || XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || - idx == 0 || - prev->br_startoff + prev->br_blockcount < offset_fsb) + !xfs_iext_get_extent(ifp, idx - 1, &prev) || + prev.br_startoff + prev.br_blockcount < offset_fsb) return mp->m_writeio_blocks; /* @@ -439,8 +440,8 @@ xfs_iomap_prealloc_size( * always extends to MAXEXTLEN rather than falling short due to things * like stripe unit/width alignment of real extents. */ - if (prev->br_blockcount <= (MAXEXTLEN >> 1)) - alloc_blocks = prev->br_blockcount << 1; + if (prev.br_blockcount <= (MAXEXTLEN >> 1)) + alloc_blocks = prev.br_blockcount << 1; else alloc_blocks = XFS_B_TO_FSB(mp, offset); if (!alloc_blocks) @@ -538,7 +539,6 @@ xfs_file_iomap_begin_delay( xfs_fileoff_t end_fsb, orig_end_fsb; int error = 0, eof = 0; struct xfs_bmbt_irec got; - struct xfs_bmbt_irec prev; xfs_extnum_t idx; ASSERT(!XFS_IS_REALTIME_INODE(ip)); @@ -563,8 +563,7 @@ xfs_file_iomap_begin_delay( goto out_unlock; } - xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, - &got, &prev); + eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); if (!eof && got.br_startoff <= offset_fsb) { if (xfs_is_reflink_inode(ip)) { bool shared; @@ -601,8 +600,7 @@ xfs_file_iomap_begin_delay( if (eof) { xfs_fsblock_t prealloc_blocks; - prealloc_blocks = - xfs_iomap_prealloc_size(ip, offset, count, idx, &prev); + prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx); if (prealloc_blocks) { xfs_extlen_t align; xfs_off_t end_offset; -- GitLab From 06ac11df915d5d607780b86fa7d98f1ab57dab16 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 9 Jan 2017 16:38:47 +0100 Subject: [PATCH 0392/1442] xfs: pass post-eof speculative prealloc blocks to bmapi commit f782088c9e5d08e9494c63e68b4e85716df3e5f8 upstream. xfs_file_iomap_begin_delay() implements post-eof speculative preallocation by extending the block count of the requested delayed allocation. Now that xfs_bmapi_reserve_delalloc() has been updated to handle prealloc blocks separately and tag the inode, update xfs_file_iomap_begin_delay() to use the new parameter and rely on the former to tag the inode. Note that this patch does not change behavior. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iomap.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index e4bfde212cc2..15a83813b708 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -536,10 +536,11 @@ xfs_file_iomap_begin_delay( xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t maxbytes_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); - xfs_fileoff_t end_fsb, orig_end_fsb; + xfs_fileoff_t end_fsb; int error = 0, eof = 0; struct xfs_bmbt_irec got; xfs_extnum_t idx; + xfs_fsblock_t prealloc_blocks = 0; ASSERT(!XFS_IS_REALTIME_INODE(ip)); ASSERT(!xfs_get_extsz_hint(ip)); @@ -594,33 +595,32 @@ xfs_file_iomap_begin_delay( * the lower level functions are updated. */ count = min_t(loff_t, count, 1024 * PAGE_SIZE); - end_fsb = orig_end_fsb = - min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); if (eof) { - xfs_fsblock_t prealloc_blocks; - prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, idx); if (prealloc_blocks) { xfs_extlen_t align; xfs_off_t end_offset; + xfs_fileoff_t p_end_fsb; end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1); - end_fsb = XFS_B_TO_FSBT(mp, end_offset) + - prealloc_blocks; + p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + + prealloc_blocks; align = xfs_eof_alignment(ip, 0); if (align) - end_fsb = roundup_64(end_fsb, align); + p_end_fsb = roundup_64(p_end_fsb, align); - end_fsb = min(end_fsb, maxbytes_fsb); - ASSERT(end_fsb > offset_fsb); + p_end_fsb = min(p_end_fsb, maxbytes_fsb); + ASSERT(p_end_fsb > offset_fsb); + prealloc_blocks = p_end_fsb - end_fsb; } } retry: error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, - end_fsb - offset_fsb, 0, &got, &idx, eof); + end_fsb - offset_fsb, prealloc_blocks, &got, &idx, eof); switch (error) { case 0: break; @@ -628,8 +628,8 @@ xfs_file_iomap_begin_delay( case -EDQUOT: /* retry without any preallocation */ trace_xfs_delalloc_enospc(ip, offset, count); - if (end_fsb != orig_end_fsb) { - end_fsb = orig_end_fsb; + if (prealloc_blocks) { + prealloc_blocks = 0; goto retry; } /*FALLTHRU*/ @@ -637,13 +637,6 @@ xfs_file_iomap_begin_delay( goto out_unlock; } - /* - * Tag the inode as speculatively preallocated so we can reclaim this - * space on demand, if necessary. - */ - if (end_fsb != orig_end_fsb) - xfs_inode_set_eofblocks_tag(ip); - trace_xfs_iomap_alloc(ip, offset, count, 0, &got); done: if (isnullstartblock(got.br_startblock)) -- GitLab From bdbfd4ee6bc7aa37062f0222a143b3c3fee7282e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 9 Jan 2017 16:38:48 +0100 Subject: [PATCH 0393/1442] xfs: Move AGI buffer type setting to xfs_read_agi commit 200237d6746faaeaf7f4ff4abbf13f3917cee60a upstream. We've missed properly setting the buffer type for an AGI transaction in 3 spots now, so just move it into xfs_read_agi() and set it if we are in a transaction to avoid the problem in the future. This is similar to how it is done in i.e. the dir3 and attr3 read functions. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ialloc.c | 4 ++-- fs/xfs/xfs_inode.c | 2 -- fs/xfs/xfs_log_recover.c | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 51b4e0de1fdc..c482b9716347 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2450,8 +2450,6 @@ xfs_ialloc_log_agi( ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); #endif - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); - /* * Compute byte offsets for the first and last fields in the first * region and log the agi buffer. This only logs up through @@ -2592,6 +2590,8 @@ xfs_read_agi( XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); if (error) return error; + if (tp) + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_AGI_BUF); xfs_buf_set_ref(*bpp, XFS_AGI_REF); return 0; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4e560e6a12c1..512ff13ed66a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2041,7 +2041,6 @@ xfs_iunlink( agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket_index); - xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); return 0; @@ -2133,7 +2132,6 @@ xfs_iunlink_remove( agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket_index); - xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); } else { diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 2d91f5ab7538..9b3d7c76915d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -4929,7 +4929,6 @@ xlog_recover_clear_agi_bucket( agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket); - xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); -- GitLab From a585e1c4ec939d1cc5ce8776c606733d046072da Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 9 Jan 2017 16:38:49 +0100 Subject: [PATCH 0394/1442] xfs: pass state not whichfork to trace_xfs_extlist commit 7710517fc37b1899722707883b54694ea710b3c0 upstream. When xfs_bmap_trace_exlist called trace_xfs_extlist, it sent in the "whichfork" var instead of the bmap "state" as expected (even though state was already set up for this purpose). As a result, the xfs_bmap_class in tracing code used "whichfork" not state in xfs_iext_state_to_fork(), and got the wrong ifork pointer. It all goes downhill from there, including an ASSERT when ifp_bytes is empty by the time it reaches xfs_iext_get_ext(): XFS: Assertion failed: idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index aab523a7dcaf..cfd1fdd327eb 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -531,7 +531,7 @@ xfs_bmap_trace_exlist( ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(cnt == xfs_iext_count(ifp)); for (idx = 0; idx < cnt; idx++) - trace_xfs_extlist(ip, idx, whichfork, caller_ip); + trace_xfs_extlist(ip, idx, state, caller_ip); } /* -- GitLab From 4081d4a79a95252486569b014b9a666ad5bfdbee Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 9 Jan 2017 16:38:50 +0100 Subject: [PATCH 0395/1442] xfs: handle cow fork in xfs_bmap_trace_exlist commit c44a1f22626c153976289e1cd67bdcdfefc16e1f upstream. By inspection, xfs_bmap_trace_exlist isn't handling cow forks, and will trace the data fork instead. Fix this by setting state appropriately if whichfork == XFS_COW_FORK. ()___() < @ @ > | | {o_o} (|) Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index cfd1fdd327eb..d4481d4383ee 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -518,7 +518,7 @@ void xfs_bmap_trace_exlist( xfs_inode_t *ip, /* incore inode pointer */ xfs_extnum_t cnt, /* count of entries in the list */ - int whichfork, /* data or attr fork */ + int whichfork, /* data or attr or cow fork */ unsigned long caller_ip) { xfs_extnum_t idx; /* extent record index */ @@ -527,6 +527,8 @@ xfs_bmap_trace_exlist( if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; + else if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(cnt == xfs_iext_count(ifp)); -- GitLab From b85f32481d93a23488d208fca4558e84515dc0f4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:51 +0100 Subject: [PATCH 0396/1442] xfs: forbid AG btrees with level == 0 commit d2a047f31e86941fa896e0e3271536d50aba415e upstream. There is no such thing as a zero-level AG btree since even a single-node zero-records btree has one level. Btree cursor constructors read cur_nlevels straight from disk and then access things like cur_bufs[cur_nlevels - 1] which is /really/ bad if cur_nlevels is zero! Therefore, strengthen the verifiers to prevent this possibility. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_alloc.c | 10 +++++++--- fs/xfs/libxfs/xfs_ialloc.c | 9 ++++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index effb64cf714f..5050056a0b06 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2455,12 +2455,15 @@ xfs_agf_verify( be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) return false; - if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || + if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) return false; if (xfs_sb_version_hasrmapbt(&mp->m_sb) && - be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS) + (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) return false; /* @@ -2477,7 +2480,8 @@ xfs_agf_verify( return false; if (xfs_sb_version_hasreflink(&mp->m_sb) && - be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS) + (be32_to_cpu(agf->agf_refcount_level) < 1 || + be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) return false; return true;; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index c482b9716347..d45c03779dae 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2510,8 +2510,15 @@ xfs_agi_verify( if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) return false; - if (be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) + if (be32_to_cpu(agi->agi_level) < 1 || + be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) return false; + + if (xfs_sb_version_hasfinobt(&mp->m_sb) && + (be32_to_cpu(agi->agi_free_level) < 1 || + be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS)) + return false; + /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't -- GitLab From 4bb31bccea381ebadf0e1bc4433026613c15ce68 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:52 +0100 Subject: [PATCH 0397/1442] xfs: check for bogus values in btree block headers commit bb3be7e7c1c18e1b141d4cadeb98cc89ecf78099 upstream. When we're reading a btree block, make sure that what we retrieved matches the owner and level; and has a plausible number of records. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_btree.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 0e80993c8a59..21e6a6ab6b9a 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1769,8 +1769,28 @@ xfs_btree_lookup_get_block( if (error) return error; + /* Check the inode owner since the verifiers don't. */ + if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && + (cur->bc_flags & XFS_BTREE_LONG_PTRS) && + be64_to_cpu((*blkp)->bb_u.l.bb_owner) != + cur->bc_private.b.ip->i_ino) + goto out_bad; + + /* Did we get the level we were looking for? */ + if (be16_to_cpu((*blkp)->bb_level) != level) + goto out_bad; + + /* Check that internal nodes have at least one record. */ + if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) + goto out_bad; + xfs_btree_setbuf(cur, level, bp); return 0; + +out_bad: + *blkp = NULL; + xfs_trans_brelse(cur->bc_tp, bp); + return -EFSCORRUPTED; } /* -- GitLab From b88398de18331ed448739866bf9fe3743e7c1bbf Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:53 +0100 Subject: [PATCH 0398/1442] xfs: complain if we don't get nextents bmap records commit 356a3225222e5bc4df88aef3419fb6424f18ab69 upstream. When reading into memory all extents of a btree-format inode fork, complain if the number of extents we find is not the same as the number of extents reported in the inode core. This is needed to stop an IO action from accessing the garbage areas of the in-core fork. [dchinner: removed redundant assert] Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index d4481d4383ee..a2a24fac28d0 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1377,8 +1377,9 @@ xfs_bmap_read_extents( return error; block = XFS_BUF_TO_BLOCK(bp); } + if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) + return -EFSCORRUPTED; ASSERT(i == xfs_iext_count(ifp)); - ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); XFS_BMAP_TRACE_EXLIST(ip, i, whichfork); return 0; error0: -- GitLab From cd4bf1d416ef199223fa41eb8225f2a137c0be99 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:54 +0100 Subject: [PATCH 0399/1442] xfs: don't crash if reading a directory results in an unexpected hole commit 96a3aefb8ffde23180130460b0b2407b328eb727 upstream. In xfs_dir3_data_read, we can encounter the situation where err == 0 and *bpp == NULL if the given bno offset happens to be a hole; this leads to a crash if we try to set the buffer type after the _da_read_buf call. Holes can happen due to corrupt or malicious entries in the bmbt data, so be a little more careful when we're handling buffers. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_dir2_data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 725fc7841fde..e526f5a5f0be 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -329,7 +329,7 @@ xfs_dir3_data_read( err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, XFS_DATA_FORK, &xfs_dir3_data_buf_ops); - if (!err && tp) + if (!err && tp && *bpp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); return err; } -- GitLab From 12815dd15c480c0d595401e4eec54542025607f6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:55 +0100 Subject: [PATCH 0400/1442] xfs: error out if trying to add attrs and anextents > 0 commit 0f352f8ee8412bd9d34fb2a6411241da61175c0e upstream. We shouldn't assert if somehow we end up trying to add an attr fork to an inode that apparently already has attr extents because this is an indication of on-disk corruption. Instead, return an error code to userspace. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index a2a24fac28d0..89d727b659fc 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1153,6 +1153,10 @@ xfs_bmap_add_attrfork( goto trans_cancel; if (XFS_IFORK_Q(ip)) goto trans_cancel; + if (ip->i_d.di_anextents != 0) { + error = -EFSCORRUPTED; + goto trans_cancel; + } if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { /* * For inodes coming from pre-6.2 filesystems. @@ -1160,7 +1164,6 @@ xfs_bmap_add_attrfork( ASSERT(ip->i_d.di_aformat == 0); ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; } - ASSERT(ip->i_d.di_anextents == 0); xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -- GitLab From f8b20705a383357ddca04ea648a7ac8fccd438ef Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:56 +0100 Subject: [PATCH 0401/1442] xfs: don't allow di_size with high bit set commit ef388e2054feedaeb05399ed654bdb06f385d294 upstream. The on-disk field di_size is used to set i_size, which is a signed integer of loff_t. If the high bit of di_size is set, we'll end up with a negative i_size, which will cause all sorts of problems. Since the VFS won't let us create a file with such length, we should catch them here in the verifier too. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_inode_buf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 134424fac434..c906e50515f0 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -392,6 +392,14 @@ xfs_dinode_verify( if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) return false; + /* don't allow invalid i_size */ + if (be64_to_cpu(dip->di_size) & (1ULL << 63)) + return false; + + /* No zero-length symlinks. */ + if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0) + return false; + /* only version 3 or greater inodes are extensively verified here */ if (dip->di_version < 3) return true; -- GitLab From c00203386d50c1d2f2621163b9d7a533de4819fc Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:57 +0100 Subject: [PATCH 0402/1442] xfs: don't cap maximum dedupe request length commit 1bb33a98702d8360947f18a44349df75ba555d5d upstream. After various discussions on linux-fsdevel, it has been decided that it is not necessary to cap the length of a dedupe request, and that correctly-written userspace client programs will be able to absorb the change. Therefore, remove the length clamping behavior. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_file.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 6e4f7f900fea..9a5d64b5f35a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -939,7 +939,6 @@ xfs_file_clone_range( len, false); } -#define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) STATIC ssize_t xfs_file_dedupe_range( struct file *src_file, @@ -950,14 +949,6 @@ xfs_file_dedupe_range( { int error; - /* - * Limit the total length we will dedupe for each operation. - * This is intended to bound the total time spent in this - * ioctl to something sane. - */ - if (len > XFS_MAX_DEDUPE_LEN) - len = XFS_MAX_DEDUPE_LEN; - error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, len, true); if (error) -- GitLab From 3c382dda47e4b51de6d67847ddfc9c10962c4ca3 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 9 Jan 2017 16:38:58 +0100 Subject: [PATCH 0403/1442] xfs: ignore leaf attr ichdr.count in verifier during log replay commit 2e1d23370e75d7d89350d41b4ab58c7f6a0e26b2 upstream. When we create a new attribute, we first create a shortform attribute, and try to fit the new attribute into it. If that fails, we copy the (empty) attribute into a leaf attribute, and do the copy again. Thus there can be a transient state where we have an empty leaf attribute. If we encounter this during log replay, the verifier will fail. So add a test to ignore this part of the leaf attr verification during log replay. Thanks as usual to dchinner for spotting the problem. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_attr_leaf.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 8ea91f363093..2852521fc8ec 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -253,6 +253,7 @@ xfs_attr3_leaf_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_perag *pag = bp->b_pag; struct xfs_attr3_icleaf_hdr ichdr; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); @@ -273,7 +274,12 @@ xfs_attr3_leaf_verify( if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) return false; } - if (ichdr.count == 0) + /* + * In recovery there is a transient state where count == 0 is valid + * because we may have transitioned an empty shortform attr to a leaf + * if the attr didn't fit in shortform. + */ + if (pag && pag->pagf_init && ichdr.count == 0) return false; /* XXX: need to range check rest of attr header values */ -- GitLab From aa38f370b25acc478be36e237e882e78f71c67e5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:38:59 +0100 Subject: [PATCH 0404/1442] xfs: use GPF_NOFS when allocating btree cursors commit b24a978c377be5f14e798cb41238e66fe51aab2f upstream. Use NOFS for allocating btree cursors, since they can be called under the ilock. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_alloc_btree.c | 2 +- fs/xfs/libxfs/xfs_bmap_btree.c | 2 +- fs/xfs/libxfs/xfs_ialloc_btree.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 5ba2dac5e67c..c06ec77a9418 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -421,7 +421,7 @@ xfs_allocbt_init_cursor( ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); cur->bc_tp = tp; cur->bc_mp = mp; diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 8007d2ba9aef..049fa597ae91 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -796,7 +796,7 @@ xfs_bmbt_init_cursor( struct xfs_btree_cur *cur; ASSERT(whichfork != XFS_COW_FORK); - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); cur->bc_tp = tp; cur->bc_mp = mp; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index eab68ae2e011..6c6b95947e71 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -357,7 +357,7 @@ xfs_inobt_init_cursor( struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); struct xfs_btree_cur *cur; - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); cur->bc_tp = tp; cur->bc_mp = mp; -- GitLab From d9c7c9fa600acb0d587678cc71565316d11ec7ad Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:39:00 +0100 Subject: [PATCH 0405/1442] xfs: fix double-cleanup when CUI recovery fails commit 7a21272b088894070391a94fdd1c67014020fa1d upstream. Dan Carpenter reported a double-free of rcur if _defer_finish fails while we're recovering CUI items. Fix the error recovery to prevent this. Reported-by: Dan Carpenter Signed-off-by: Darrick J. Wong Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_refcount_item.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index fe86a668a57e..6e4c7446c3d4 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -526,13 +526,14 @@ xfs_cui_recover( xfs_refcount_finish_one_cleanup(tp, rcur, error); error = xfs_defer_finish(&tp, &dfops, NULL); if (error) - goto abort_error; + goto abort_defer; set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); error = xfs_trans_commit(tp); return error; abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); +abort_defer: xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); return error; -- GitLab From b96e4e87d2b0fee921ceafa4f5ae1c90e9121db4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 9 Jan 2017 16:39:01 +0100 Subject: [PATCH 0406/1442] xfs: use the actual AG length when reserving blocks commit 20e73b000bcded44a91b79429d8fa743247602ad upstream. We need to use the actual AG length when making per-AG reservations, since we could otherwise end up reserving more blocks out of the last AG than there are actual blocks. Complained-about-by: Brian Foster Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ag_resv.c | 3 +++ fs/xfs/libxfs/xfs_refcount_btree.c | 9 ++++++--- fs/xfs/libxfs/xfs_refcount_btree.h | 3 ++- fs/xfs/libxfs/xfs_rmap_btree.c | 14 +++++++------- fs/xfs/libxfs/xfs_rmap_btree.h | 3 ++- fs/xfs/xfs_fsops.c | 14 ++++++++++++++ 6 files changed, 34 insertions(+), 12 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index e5ebc3770460..d346d42c54d1 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -256,6 +256,9 @@ xfs_ag_resv_init( goto out; } + ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + + xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= + pag->pagf_freeblks + pag->pagf_flcount); out: return error; } diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 453bb2757ec2..2ba216966002 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -408,13 +408,14 @@ xfs_refcountbt_calc_size( */ xfs_extlen_t xfs_refcountbt_max_size( - struct xfs_mount *mp) + struct xfs_mount *mp, + xfs_agblock_t agblocks) { /* Bail out if we're uninitialized, which can happen in mkfs. */ if (mp->m_refc_mxr[0] == 0) return 0; - return xfs_refcountbt_calc_size(mp, mp->m_sb.sb_agblocks); + return xfs_refcountbt_calc_size(mp, agblocks); } /* @@ -429,22 +430,24 @@ xfs_refcountbt_calc_reserves( { struct xfs_buf *agbp; struct xfs_agf *agf; + xfs_agblock_t agblocks; xfs_extlen_t tree_len; int error; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - *ask += xfs_refcountbt_max_size(mp); error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); + agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_refcount_blocks); xfs_buf_relse(agbp); + *ask += xfs_refcountbt_max_size(mp, agblocks); *used += tree_len; return error; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index 3be7768bd51a..9db008b955b7 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -66,7 +66,8 @@ extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp, unsigned long long len); -extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp); +extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp, + xfs_agblock_t agblocks); extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 83e672ff7577..33a28efc3085 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -549,13 +549,14 @@ xfs_rmapbt_calc_size( */ xfs_extlen_t xfs_rmapbt_max_size( - struct xfs_mount *mp) + struct xfs_mount *mp, + xfs_agblock_t agblocks) { /* Bail out if we're uninitialized, which can happen in mkfs. */ if (mp->m_rmap_mxr[0] == 0) return 0; - return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks); + return xfs_rmapbt_calc_size(mp, agblocks); } /* @@ -570,25 +571,24 @@ xfs_rmapbt_calc_reserves( { struct xfs_buf *agbp; struct xfs_agf *agf; - xfs_extlen_t pool_len; + xfs_agblock_t agblocks; xfs_extlen_t tree_len; int error; if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) return 0; - /* Reserve 1% of the AG or enough for 1 block per record. */ - pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp)); - *ask += pool_len; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error) return error; agf = XFS_BUF_TO_AGF(agbp); + agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_rmap_blocks); xfs_buf_relse(agbp); + /* Reserve 1% of the AG or enough for 1 block per record. */ + *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks)); *used += tree_len; return error; diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 2a9ac472fb15..19c08e933049 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -60,7 +60,8 @@ extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp, unsigned long long len); -extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp); +extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp, + xfs_agblock_t agblocks); extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 93d12fa2670d..242e8091296d 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -631,6 +631,20 @@ xfs_growfs_data_private( xfs_set_low_space_thresholds(mp); mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); + /* + * If we expanded the last AG, free the per-AG reservation + * so we can reinitialize it with the new size. + */ + if (new) { + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, agno); + error = xfs_ag_resv_free(pag); + xfs_perag_put(pag); + if (error) + goto out; + } + /* Reserve AG metadata blocks. */ error = xfs_fs_reserve_ag_blocks(mp); if (error && error != -ENOSPC) -- GitLab From 91192ae41e6f638d580bd0e3e6fd2d311eaf5adf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 16:39:02 +0100 Subject: [PATCH 0407/1442] xfs: fix crash and data corruption due to removal of busy COW extents commit a1b7a4dea6166cf46be895bce4aac67ea5160fe8 upstream. There is a race window between write_cache_pages calling clear_page_dirty_for_io and XFS calling set_page_writeback, in which the mapping for an inode is tagged neither as dirty, nor as writeback. If the COW shrinker hits in exactly that window we'll remove the delayed COW extents and writepages trying to write it back, which in release kernels will manifest as corruption of the bmap btree, and in debug kernels will trip the ASSERT about now calling xfs_bmapi_write with the COWFORK flag for holes. A complex customer load manages to hit this window fairly reliably, probably by always having COW writeback in flight while the cow shrinker runs. This patch adds another check for having the I_DIRTY_PAGES flag set, which is still set during this race window. While this fixes the problem I'm still not overly happy about the way the COW shrinker works as it still seems a bit fragile. Signed-off-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_icache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 9c3e5c6ddf20..29cc9886a3cb 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1599,7 +1599,8 @@ xfs_inode_free_cowblocks( * If the mapping is dirty or under writeback we cannot touch the * CoW fork. Leave it alone if we're in the midst of a directio. */ - if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || + if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || + mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || atomic_read(&VFS_I(ip)->i_dio_count)) return 0; -- GitLab From 1b9c2556809ad30ef8e28398d77b6b007e7be6c0 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Mon, 9 Jan 2017 16:39:03 +0100 Subject: [PATCH 0408/1442] xfs: fix max_retries _show and _store functions commit ff97f2399edac1e0fb3fa7851d5fbcbdf04717cf upstream. max_retries _show and _store functions should test against cfg->max_retries, not cfg->retry_timeout Signed-off-by: Carlos Maiolino Reviewed-by: Eric Sandeen Signed-off-by: Darrick J. Wong Cc: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 276d3023d60f..de6195e38910 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -396,7 +396,7 @@ max_retries_show( int retries; struct xfs_error_cfg *cfg = to_error_cfg(kobject); - if (cfg->retry_timeout == XFS_ERR_RETRY_FOREVER) + if (cfg->max_retries == XFS_ERR_RETRY_FOREVER) retries = -1; else retries = cfg->max_retries; @@ -422,7 +422,7 @@ max_retries_store( return -EINVAL; if (val == -1) - cfg->retry_timeout = XFS_ERR_RETRY_FOREVER; + cfg->max_retries = XFS_ERR_RETRY_FOREVER; else cfg->max_retries = val; return count; -- GitLab From 34db201f0de7138d3646946f5d33e178c93efca6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Dec 2016 12:01:05 +0100 Subject: [PATCH 0409/1442] clocksource/dummy_timer: Move hotplug callback after the real timers commit 9bf11ecce5a2758e5a097c2f3a13d08552d0d6f9 upstream. When the dummy timer callback is invoked before the real timer callbacks, then it tries to install that timer for the starting CPU. If the platform does not have a broadcast timer installed the installation fails with a kernel crash. The crash happens due to a unconditional deference of the non available broadcast device. This needs to be fixed in the timer core code. But even when this is fixed in the core code then installing the dummy timer before the real timers is a pointless exercise. Move it to the end of the callback list. Fixes: 00c1d17aab51 ("clocksource/dummy_timer: Convert to hotplug state machine") Reported-and-tested-by: Mason Signed-off-by: Thomas Gleixner Cc: Mark Rutland Cc: Anna-Maria Gleixner Cc: Richard Cochran Cc: Sebastian Andrzej Siewior Cc: Daniel Lezcano Cc: Peter Zijlstra , Cc: Sebastian Frias Cc: Thibaud Cornic Cc: Robin Murphy Link: http://lkml.kernel.org/r/1147ef90-7877-e4d2-bb2b-5c4fa8d3144b@free.fr Signed-off-by: Greg Kroah-Hartman --- include/linux/cpuhotplug.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index afe641c02dca..ba1cad7b97cf 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -80,7 +80,6 @@ enum cpuhp_state { CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, CPUHP_AP_ARM_GLOBAL_TIMER_STARTING, - CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_JCORE_TIMER_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_TWD_STARTING, @@ -94,6 +93,8 @@ enum cpuhp_state { CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, CPUHP_AP_KVM_ARM_VGIC_STARTING, CPUHP_AP_KVM_ARM_TIMER_STARTING, + /* Must be the last timer callback */ + CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM_CORESIGHT4_STARTING, -- GitLab From cf365b117388a50902d56988e69dd6f70b7662fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Dec 2016 12:10:37 +0100 Subject: [PATCH 0410/1442] tick/broadcast: Prevent NULL pointer dereference commit c1a9eeb938b5433947e5ea22f89baff3182e7075 upstream. When a disfunctional timer, e.g. dummy timer, is installed, the tick core tries to setup the broadcast timer. If no broadcast device is installed, the kernel crashes with a NULL pointer dereference in tick_broadcast_setup_oneshot() because the function has no sanity check. Reported-by: Mason Signed-off-by: Thomas Gleixner Cc: Mark Rutland Cc: Anna-Maria Gleixner Cc: Richard Cochran Cc: Sebastian Andrzej Siewior Cc: Daniel Lezcano Cc: Peter Zijlstra , Cc: Sebastian Frias Cc: Thibaud Cornic Cc: Robin Murphy Link: http://lkml.kernel.org/r/1147ef90-7877-e4d2-bb2b-5c4fa8d3144b@free.fr Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-broadcast.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f6aae7977824..d2a20e83ebae 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -871,6 +871,9 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { int cpu = smp_processor_id(); + if (!bc) + return; + /* Set it up only once ! */ if (bc->event_handler != tick_handle_oneshot_broadcast) { int was_periodic = clockevent_state_periodic(bc); -- GitLab From ec3d5c521af86e53690c8850def2d2f6a35e7756 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 12 Jan 2017 08:29:09 +0100 Subject: [PATCH 0411/1442] Revert "rtlwifi: Fix enter/exit power_save" This reverts commit 98068574928f499b30f136ff57ef9a03cc575a36, which is commit ba9f93f82abafe2552eac942ebb11c2df4f8dd7f upstream as it causes problems. Reported-by: Dmitry Osipenko Cc: Ping-Ke Shih Cc: Larry Finger Cc: Kalle Valo Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- drivers/net/wireless/realtek/rtlwifi/base.c | 8 ++--- drivers/net/wireless/realtek/rtlwifi/core.c | 9 ++++-- drivers/net/wireless/realtek/rtlwifi/pci.c | 14 +++++--- drivers/net/wireless/realtek/rtlwifi/ps.c | 36 +++++---------------- 4 files changed, 27 insertions(+), 40 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 4ac928bf1f8e..264466f59c57 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1303,13 +1303,12 @@ EXPORT_SYMBOL_GPL(rtl_action_proc); static void setup_arp_tx(struct rtl_priv *rtlpriv, struct rtl_ps_ctl *ppsc) { - struct ieee80211_hw *hw = rtlpriv->hw; - rtlpriv->ra.is_special_data = true; if (rtlpriv->cfg->ops->get_btc_status()) rtlpriv->btcoexist.btc_ops->btc_special_packet_notify( rtlpriv, 1); - rtl_lps_leave(hw); + rtlpriv->enter_ps = false; + schedule_work(&rtlpriv->works.lps_change_work); ppsc->last_delaylps_stamp_jiffies = jiffies; } @@ -1382,7 +1381,8 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx, if (is_tx) { rtlpriv->ra.is_special_data = true; - rtl_lps_leave(hw); + rtlpriv->enter_ps = false; + schedule_work(&rtlpriv->works.lps_change_work); ppsc->last_delaylps_stamp_jiffies = jiffies; } diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 4da4e458142c..8e7f23c11680 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -1150,8 +1150,10 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, } else { mstatus = RT_MEDIA_DISCONNECT; - if (mac->link_state == MAC80211_LINKED) - rtl_lps_leave(hw); + if (mac->link_state == MAC80211_LINKED) { + rtlpriv->enter_ps = false; + schedule_work(&rtlpriv->works.lps_change_work); + } if (ppsc->p2p_ps_info.p2p_ps_mode > P2P_PS_NONE) rtl_p2p_ps_cmd(hw, P2P_PS_DISABLE); mac->link_state = MAC80211_NOLINK; @@ -1429,7 +1431,8 @@ static void rtl_op_sw_scan_start(struct ieee80211_hw *hw, } if (mac->link_state == MAC80211_LINKED) { - rtl_lps_leave(hw); + rtlpriv->enter_ps = false; + schedule_work(&rtlpriv->works.lps_change_work); mac->link_state = MAC80211_LINKED_SCANNING; } else { rtl_ips_nic_on(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 5be4fc96002d..0dfa9eac3926 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -663,9 +663,11 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio) } if (((rtlpriv->link_info.num_rx_inperiod + - rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) - rtl_lps_leave(hw); + rtlpriv->link_info.num_tx_inperiod) > 8) || + (rtlpriv->link_info.num_rx_inperiod > 2)) { + rtlpriv->enter_ps = false; + schedule_work(&rtlpriv->works.lps_change_work); + } } static int _rtl_pci_init_one_rxdesc(struct ieee80211_hw *hw, @@ -916,8 +918,10 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) } if (((rtlpriv->link_info.num_rx_inperiod + rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) - rtl_lps_leave(hw); + (rtlpriv->link_info.num_rx_inperiod > 2)) { + rtlpriv->enter_ps = false; + schedule_work(&rtlpriv->works.lps_change_work); + } skb = new_skb; no_new: if (rtlpriv->use_new_trx_flow) { diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index d0ffc4d508cf..18d979affc18 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -407,8 +407,8 @@ void rtl_lps_set_psmode(struct ieee80211_hw *hw, u8 rt_psmode) } } -/* Interrupt safe routine to enter the leisure power save mode.*/ -static void rtl_lps_enter_core(struct ieee80211_hw *hw) +/*Enter the leisure power save mode.*/ +void rtl_lps_enter(struct ieee80211_hw *hw) { struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); @@ -444,9 +444,10 @@ static void rtl_lps_enter_core(struct ieee80211_hw *hw) spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); } +EXPORT_SYMBOL(rtl_lps_enter); -/* Interrupt safe routine to leave the leisure power save mode.*/ -static void rtl_lps_leave_core(struct ieee80211_hw *hw) +/*Leave the leisure power save mode.*/ +void rtl_lps_leave(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); @@ -476,6 +477,7 @@ static void rtl_lps_leave_core(struct ieee80211_hw *hw) } spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); } +EXPORT_SYMBOL(rtl_lps_leave); /* For sw LPS*/ void rtl_swlps_beacon(struct ieee80211_hw *hw, void *data, unsigned int len) @@ -668,34 +670,12 @@ void rtl_lps_change_work_callback(struct work_struct *work) struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->enter_ps) - rtl_lps_enter_core(hw); + rtl_lps_enter(hw); else - rtl_lps_leave_core(hw); + rtl_lps_leave(hw); } EXPORT_SYMBOL_GPL(rtl_lps_change_work_callback); -void rtl_lps_enter(struct ieee80211_hw *hw) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - - if (!in_interrupt()) - return rtl_lps_enter_core(hw); - rtlpriv->enter_ps = true; - schedule_work(&rtlpriv->works.lps_change_work); -} -EXPORT_SYMBOL_GPL(rtl_lps_enter); - -void rtl_lps_leave(struct ieee80211_hw *hw) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - - if (!in_interrupt()) - return rtl_lps_leave_core(hw); - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); -} -EXPORT_SYMBOL_GPL(rtl_lps_leave); - void rtl_swlps_wq_callback(void *data) { struct rtl_works *rtlworks = container_of_dwork_rtl(data, -- GitLab From 7ff469ceba2652241778e75f263205fe7932b376 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 12 Jan 2017 08:51:32 +0100 Subject: [PATCH 0412/1442] Revert "usb: gadget: composite: always set ep->mult to a sensible value" This reverts commit eab1c4e2d0ad4509ccb8476a604074547dc202e0 which is commit eaa496ffaaf19591fe471a36cef366146eeb9153 upstream as it was incorrectly backported. Reported-by: Bin Liu Cc: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 9 ++------- drivers/usb/gadget/function/uvc_video.c | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index e1841fa96412..c3ee5d93271f 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -201,12 +201,7 @@ int config_ep_by_speed(struct usb_gadget *g, _ep->desc = chosen_desc; _ep->comp_desc = NULL; _ep->maxburst = 0; - _ep->mult = 1; - - if (g->speed == USB_SPEED_HIGH && (usb_endpoint_xfer_isoc(_ep->desc) || - usb_endpoint_xfer_int(_ep->desc))) - _ep->mult = usb_endpoint_maxp(_ep->desc) & 0x7ff; - + _ep->mult = 0; if (!want_comp_desc) return 0; @@ -223,7 +218,7 @@ int config_ep_by_speed(struct usb_gadget *g, switch (usb_endpoint_type(_ep->desc)) { case USB_ENDPOINT_XFER_ISOC: /* mult: bits 1:0 of bmAttributes */ - _ep->mult = (comp_desc->bmAttributes & 0x3) + 1; + _ep->mult = comp_desc->bmAttributes & 0x3; case USB_ENDPOINT_XFER_BULK: case USB_ENDPOINT_XFER_INT: _ep->maxburst = comp_desc->bMaxBurst + 1; diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 0f01c04d7cbd..3d0d5d94a62f 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -243,7 +243,7 @@ uvc_video_alloc_requests(struct uvc_video *video) req_size = video->ep->maxpacket * max_t(unsigned int, video->ep->maxburst, 1) - * (video->ep->mult); + * (video->ep->mult + 1); for (i = 0; i < UVC_NUM_REQUESTS; ++i) { video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL); -- GitLab From 3999c535da7add2f5283303d77eb56434e660fd6 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 28 Sep 2016 12:33:31 +0300 Subject: [PATCH 0413/1442] usb: gadget: composite: always set ep->mult to a sensible value commit eaa496ffaaf19591fe471a36cef366146eeb9153 upstream. ep->mult is supposed to be set to Isochronous and Interrupt Endapoint's multiplier value. This value is computed from different places depending on the link speed. If we're dealing with HighSpeed, then it's part of bits [12:11] of wMaxPacketSize. This case wasn't taken into consideration before. While at that, also make sure the ep->mult defaults to one so drivers can use it unconditionally and assume they'll never multiply ep->maxpacket to zero. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 9 +++++++-- drivers/usb/gadget/function/uvc_video.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index c3ee5d93271f..c3c5b87b35b3 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -201,7 +201,12 @@ int config_ep_by_speed(struct usb_gadget *g, _ep->desc = chosen_desc; _ep->comp_desc = NULL; _ep->maxburst = 0; - _ep->mult = 0; + _ep->mult = 1; + + if (g->speed == USB_SPEED_HIGH && (usb_endpoint_xfer_isoc(_ep->desc) || + usb_endpoint_xfer_int(_ep->desc))) + _ep->mult = ((usb_endpoint_maxp(_ep->desc) & 0x1800) >> 11) + 1; + if (!want_comp_desc) return 0; @@ -218,7 +223,7 @@ int config_ep_by_speed(struct usb_gadget *g, switch (usb_endpoint_type(_ep->desc)) { case USB_ENDPOINT_XFER_ISOC: /* mult: bits 1:0 of bmAttributes */ - _ep->mult = comp_desc->bmAttributes & 0x3; + _ep->mult = (comp_desc->bmAttributes & 0x3) + 1; case USB_ENDPOINT_XFER_BULK: case USB_ENDPOINT_XFER_INT: _ep->maxburst = comp_desc->bMaxBurst + 1; diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 3d0d5d94a62f..0f01c04d7cbd 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -243,7 +243,7 @@ uvc_video_alloc_requests(struct uvc_video *video) req_size = video->ep->maxpacket * max_t(unsigned int, video->ep->maxburst, 1) - * (video->ep->mult + 1); + * (video->ep->mult); for (i = 0; i < UVC_NUM_REQUESTS; ++i) { video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL); -- GitLab From 584fd7872c1bc29d3b752bebb6a5b470018f83e8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 12 Jan 2017 11:41:42 +0100 Subject: [PATCH 0414/1442] Linux 4.9.3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c9ce897465c5..ae42a0aaab06 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 2 +SUBLEVEL = 3 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From d4a0b2e40c46898a307e37a857f623c3d884b1fe Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 14 Dec 2016 11:06:18 -0800 Subject: [PATCH 0415/1442] net: vrf: Fix NAT within a VRF [ Upstream commit a0f37efa82253994b99623dbf41eea8dd0ba169b ] Connection tracking with VRF is broken because the pass through the VRF device drops the connection tracking info. Removing the call to nf_reset allows DNAT and MASQUERADE to work across interfaces within a VRF. Fixes: 73e20b761acf ("net: vrf: Add support for PREROUTING rules on vrf device") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 820de6a9ddde..5b995c43bff1 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -850,8 +850,6 @@ static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook, { struct net *net = dev_net(dev); - nf_reset(skb); - if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0) skb = NULL; /* kfree_skb(skb) handled by nf code */ -- GitLab From 8b8fbe5c25abcdc93fcb91c896c40452ca9789a3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 14 Dec 2016 14:31:11 -0800 Subject: [PATCH 0416/1442] net: vrf: Drop conntrack data after pass through VRF device on Tx [ Upstream commit eb63ecc1706b3e094d0f57438b6c2067cfc299f2 ] Locally originated traffic in a VRF fails in the presence of a POSTROUTING rule. For example, $ iptables -t nat -A POSTROUTING -s 11.1.1.0/24 -j MASQUERADE $ ping -I red -c1 11.1.1.3 ping: Warning: source address might be selected on device other than red. PING 11.1.1.3 (11.1.1.3) from 11.1.1.2 red: 56(84) bytes of data. ping: sendmsg: Operation not permitted Worse, the above causes random corruption resulting in a panic in random places (I have not seen a consistent backtrace). Call nf_reset to drop the conntrack info following the pass through the VRF device. The nf_reset is needed on Tx but not Rx because of the order in which NF_HOOK's are hit: on Rx the VRF device is after the real ingress device and on Tx it is is before the real egress device. Connection tracking should be tied to the real egress device and not the VRF device. Fixes: 8f58336d3f78a ("net: Add ethernet header for pass through VRF device") Fixes: 35402e3136634 ("net: Add IPv6 support to VRF device") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 5b995c43bff1..3cb35881dfda 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -371,6 +371,8 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, struct in6_addr *nexthop; int ret; + nf_reset(skb); + skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -552,6 +554,8 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s u32 nexthop; int ret = -EINVAL; + nf_reset(skb); + /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { struct sk_buff *skb2; -- GitLab From ed3cc329c7bc82c0f29735d81b26f96287dbe08a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 15 Dec 2016 23:05:52 +0800 Subject: [PATCH 0417/1442] sctp: sctp_transport_lookup_process should rcu_read_unlock when transport is null [ Upstream commit 08abb79542c9e8c367d1d8e44fe1026868d3f0a7 ] Prior to this patch, sctp_transport_lookup_process didn't rcu_read_unlock when it failed to find a transport by sctp_addrs_lookup_transport. This patch is to fix it by moving up rcu_read_unlock right before checking transport and also to remove the out path. Fixes: 1cceda784980 ("sctp: fix the issue sctp_diag uses lock_sock in rcu_read_lock") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f23ad913dc7a..ca12aa346c0d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4479,9 +4479,10 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), rcu_read_lock(); transport = sctp_addrs_lookup_transport(net, laddr, paddr); - if (!transport || !sctp_transport_hold(transport)) + if (!transport || !sctp_transport_hold(transport)) { + rcu_read_unlock(); goto out; - + } rcu_read_unlock(); err = cb(transport, p); sctp_transport_put(transport); -- GitLab From d36a1cb1e3285ba7eb1bcff5b231b4786deefc5b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 22 Dec 2016 18:19:16 -0500 Subject: [PATCH 0418/1442] inet: fix IP(V6)_RECVORIGDSTADDR for udp sockets [ Upstream commit 39b2dd765e0711e1efd1d1df089473a8dd93ad48 ] Socket cmsg IP(V6)_RECVORIGDSTADDR checks that port range lies within the packet. For sockets that have transport headers pulled, transport offset can be negative. Use signed comparison to avoid overflow. Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Reported-by: Nisar Jagabar Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 2 +- net/ipv6/datagram.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b8a2d63d1fb8..e869773e3979 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -137,7 +137,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); __be16 *ports = (__be16 *)skb_transport_header(skb); - if (skb_transport_offset(skb) + 4 > skb->len) + if (skb_transport_offset(skb) + 4 > (int)skb->len) return; /* All current transport protocols have the port numbers in the diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index ccf40550c475..8616d17cf08f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -700,7 +700,7 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, struct sockaddr_in6 sin6; __be16 *ports = (__be16 *) skb_transport_header(skb); - if (skb_transport_offset(skb) + 4 <= skb->len) { + if (skb_transport_offset(skb) + 4 <= (int)skb->len) { /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. -- GitLab From ee99e2bc5e8a61e5e0025b7e654e6179d31a75fa Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 22 Dec 2016 11:16:22 -0500 Subject: [PATCH 0419/1442] ipv6: handle -EFAULT from skb_copy_bits [ Upstream commit a98f91758995cb59611e61318dddd8a6956b52c3 ] By setting certain socket options on ipv6 raw sockets, we can confuse the length calculation in rawv6_push_pending_frames triggering a BUG_ON. RIP: 0010:[] [] rawv6_sendmsg+0xc30/0xc40 RSP: 0018:ffff881f6c4a7c18 EFLAGS: 00010282 RAX: 00000000fffffff2 RBX: ffff881f6c681680 RCX: 0000000000000002 RDX: ffff881f6c4a7cf8 RSI: 0000000000000030 RDI: ffff881fed0f6a00 RBP: ffff881f6c4a7da8 R08: 0000000000000000 R09: 0000000000000009 R10: ffff881fed0f6a00 R11: 0000000000000009 R12: 0000000000000030 R13: ffff881fed0f6a00 R14: ffff881fee39ba00 R15: ffff881fefa93a80 Call Trace: [] ? unmap_page_range+0x693/0x830 [] inet_sendmsg+0x67/0xa0 [] sock_sendmsg+0x38/0x50 [] SYSC_sendto+0xef/0x170 [] SyS_sendto+0xe/0x10 [] do_syscall_64+0x50/0xa0 [] entry_SYSCALL64_slow_path+0x25/0x25 Handle by jumping to the failure path if skb_copy_bits gets an EFAULT. Reproducer: #include #include #include #include #include #include #include #define LEN 504 int main(int argc, char* argv[]) { int fd; int zero = 0; char buf[LEN]; memset(buf, 0, LEN); fd = socket(AF_INET6, SOCK_RAW, 7); setsockopt(fd, SOL_IPV6, IPV6_CHECKSUM, &zero, 4); setsockopt(fd, SOL_IPV6, IPV6_DSTOPTS, &buf, LEN); sendto(fd, buf, 1, 0, (struct sockaddr *) buf, 110); } Signed-off-by: Dave Jones Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/raw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 054a1d84fc5e..869ffc76befa 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -589,7 +589,11 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, } offset += skb_transport_offset(skb); - BUG_ON(skb_copy_bits(skb, offset, &csum, 2)); + err = skb_copy_bits(skb, offset, &csum, 2); + if (err < 0) { + ip6_flush_pending_frames(sk); + goto out; + } /* in case cksum was not initialized */ if (unlikely(csum)) -- GitLab From 09babe4ce12ef1698db87c29ad9329529fab1c85 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 21 Dec 2016 18:04:11 +0100 Subject: [PATCH 0420/1442] net, sched: fix soft lockup in tc_classify [ Upstream commit 628185cfddf1dfb701c4efe2cfd72cf5b09f5702 ] Shahar reported a soft lockup in tc_classify(), where we run into an endless loop when walking the classifier chain due to tp->next == tp which is a state we should never run into. The issue only seems to trigger under load in the tc control path. What happens is that in tc_ctl_tfilter(), thread A allocates a new tp, initializes it, sets tp_created to 1, and calls into tp->ops->change() with it. In that classifier callback we had to unlock/lock the rtnl mutex and returned with -EAGAIN. One reason why we need to drop there is, for example, that we need to request an action module to be loaded. This happens via tcf_exts_validate() -> tcf_action_init/_1() meaning after we loaded and found the requested action, we need to redo the whole request so we don't race against others. While we had to unlock rtnl in that time, thread B's request was processed next on that CPU. Thread B added a new tp instance successfully to the classifier chain. When thread A returned grabbing the rtnl mutex again, propagating -EAGAIN and destroying its tp instance which never got linked, we goto replay and redo A's request. This time when walking the classifier chain in tc_ctl_tfilter() for checking for existing tp instances we had a priority match and found the tp instance that was created and linked by thread B. Now calling again into tp->ops->change() with that tp was successful and returned without error. tp_created was never cleared in the second round, thus kernel thinks that we need to link it into the classifier chain (once again). tp and *back point to the same object due to the match we had earlier on. Thus for thread B's already public tp, we reset tp->next to tp itself and link it into the chain, which eventually causes the mentioned endless loop in tc_classify() once a packet hits the data path. Fix is to clear tp_created at the beginning of each request, also when we replay it. On the paths that can cause -EAGAIN we already destroy the original tp instance we had and on replay we really need to start from scratch. It seems that this issue was first introduced in commit 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup"). Fixes: 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup") Reported-by: Shahar Klein Signed-off-by: Daniel Borkmann Cc: Cong Wang Acked-by: Eric Dumazet Tested-by: Shahar Klein Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b05d4a2155b0..c1a4b5d30814 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) unsigned long cl; unsigned long fh; int err; - int tp_created = 0; + int tp_created; if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: + tp_created = 0; + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); if (err < 0) return err; -- GitLab From 99f40c6bf565eaf69c705d944f687712e108a3dd Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 27 Dec 2016 18:23:06 -0800 Subject: [PATCH 0421/1442] net: stmmac: Fix race between stmmac_drv_probe and stmmac_open [ Upstream commit 5701659004d68085182d2fd4199c79172165fa65 ] There is currently a small window during which the network device registered by stmmac can be made visible, yet all resources, including and clock and MDIO bus have not had a chance to be set up, this can lead to the following error to occur: [ 473.919358] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized): stmmac_dvr_probe: warning: cannot get CSR clock [ 473.919382] stmmaceth 0000:01:00.0: no reset control found [ 473.919412] stmmac - user ID: 0x10, Synopsys ID: 0x42 [ 473.919429] stmmaceth 0000:01:00.0: DMA HW capability register supported [ 473.919436] stmmaceth 0000:01:00.0: RX Checksum Offload Engine supported [ 473.919443] stmmaceth 0000:01:00.0: TX Checksum insertion supported [ 473.919451] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized): Enable RX Mitigation via HW Watchdog Timer [ 473.921395] libphy: PHY stmmac-1:00 not found [ 473.921417] stmmaceth 0000:01:00.0 eth0: Could not attach to PHY [ 473.921427] stmmaceth 0000:01:00.0 eth0: stmmac_open: Cannot attach to PHY (error: -19) [ 473.959710] libphy: stmmac: probed [ 473.959724] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 0 IRQ POLL (stmmac-1:00) active [ 473.959728] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 1 IRQ POLL (stmmac-1:01) [ 473.959731] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 2 IRQ POLL (stmmac-1:02) [ 473.959734] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 3 IRQ POLL (stmmac-1:03) Fix this by making sure that register_netdev() is the last thing being done, which guarantees that the clock and the MDIO bus are available. Fixes: 4bfcbd7abce2 ("stmmac: Move the mdio_register/_unregister in probe/remove") Reported-by: Kweh, Hock Leong Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index caf069a465f2..b2893fbe25e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3349,12 +3349,6 @@ int stmmac_dvr_probe(struct device *device, spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); - ret = register_netdev(ndev); - if (ret) { - pr_err("%s: ERROR %i registering the device\n", __func__, ret); - goto error_netdev_register; - } - /* If a specific clk_csr value is passed from the platform * this means that the CSR Clock Range selection cannot be * changed at run-time and it is fixed. Viceversa the driver'll try to @@ -3376,15 +3370,24 @@ int stmmac_dvr_probe(struct device *device, if (ret < 0) { pr_debug("%s: MDIO bus (id: %d) registration failed", __func__, priv->plat->bus_id); - goto error_mdio_register; + goto error_napi_register; } } - return 0; + ret = register_netdev(ndev); + if (ret) { + pr_err("%s: ERROR %i registering the device\n", __func__, ret); + goto error_netdev_register; + } + + return ret; -error_mdio_register: - unregister_netdev(ndev); error_netdev_register: + if (priv->hw->pcs != STMMAC_PCS_RGMII && + priv->hw->pcs != STMMAC_PCS_TBI && + priv->hw->pcs != STMMAC_PCS_RTBI) + stmmac_mdio_unregister(ndev); +error_napi_register: netif_napi_del(&priv->napi); error_hw_init: clk_disable_unprepare(priv->pclk); -- GitLab From 9b4a34ff89878abbab4e22efaf7c2c683664304f Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 28 Dec 2016 14:54:47 +0200 Subject: [PATCH 0422/1442] net/sched: cls_flower: Fix missing addr_type in classify [ Upstream commit 0df0f207aab4f42e5c96a807adf9a6845b69e984 ] Since we now use a non zero mask on addr_type, we are matching on its value (IPV4/IPV6). So before this fix, matching on enc_src_ip/enc_dst_ip failed in SW/classify path since its value was zero. This patch sets the proper value of addr_type for encapsulated packets. Fixes: 970bfcd09791 ('net/sched: cls_flower: Use mask for addr_type') Signed-off-by: Paul Blakey Reviewed-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_flower.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 904442421db3..eee299bb6bcf 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -149,10 +149,14 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, switch (ip_tunnel_info_af(info)) { case AF_INET: + skb_key.enc_control.addr_type = + FLOW_DISSECTOR_KEY_IPV4_ADDRS; skb_key.enc_ipv4.src = key->u.ipv4.src; skb_key.enc_ipv4.dst = key->u.ipv4.dst; break; case AF_INET6: + skb_key.enc_control.addr_type = + FLOW_DISSECTOR_KEY_IPV6_ADDRS; skb_key.enc_ipv6.src = key->u.ipv6.src; skb_key.enc_ipv6.dst = key->u.ipv6.dst; break; -- GitLab From 7bf1de7f2749ad858c33d3133240d5c3df046658 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 28 Dec 2016 14:58:32 +0200 Subject: [PATCH 0423/1442] net/mlx5: Check FW limitations on log_max_qp before setting it [ Upstream commit 883371c453b937f9eb581fb4915210865982736f ] When setting HCA capabilities, set log_max_qp to be the minimum between the selected profile's value and the HCA limitation. Fixes: 938fe83c8dcb ('net/mlx5_core: New device capabilities...') Signed-off-by: Noa Osherovich Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ada24e103b02..332769cde9f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -468,6 +468,13 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, to_fw_pkey_sz(dev, 128)); + /* Check log_max_qp from HCA caps to set in current profile */ + if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) { + mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", + profile[prof_sel].log_max_qp, + MLX5_CAP_GEN_MAX(dev, log_max_qp)); + profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + } if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, prof->log_max_qp); -- GitLab From ca8a64467f2a20f86218c483c03fb7fc8c33c156 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 28 Dec 2016 14:58:33 +0200 Subject: [PATCH 0424/1442] net/mlx5: Cancel recovery work in remove flow [ Upstream commit 689a248df83b6032edc57e86267b4e5cc8d7174e ] If there is pending delayed work for health recovery it must be canceled if the device is being unloaded. Fixes: 05ac2c0b7438 ("net/mlx5: Fix race between PCI error handlers and health work") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 332769cde9f4..15b7e600bf9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1159,6 +1159,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, { int err = 0; + mlx5_drain_health_wq(dev); + mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", @@ -1319,10 +1321,9 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - /* In case of kernel call save the pci state and drain health wq */ + /* In case of kernel call save the pci state */ if (state) { pci_save_state(pdev); - mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); } -- GitLab From efbbc75c00fc15598cd863dfe117dfa976e8b971 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 28 Dec 2016 14:58:34 +0200 Subject: [PATCH 0425/1442] net/mlx5: Avoid shadowing numa_node [ Upstream commit d151d73dcc99de87c63bdefebcc4cb69de1cdc40 ] Avoid using a local variable named numa_node to avoid shadowing a public one. Fixes: db058a186f98 ('net/mlx5_core: Set irq affinity hints') Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 15b7e600bf9b..92bd13ddc39d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -547,7 +547,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; struct msix_entry *msix = priv->msix_arr; int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - int numa_node = priv->numa_node; int err; if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { @@ -555,7 +554,7 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) return -ENOMEM; } - cpumask_set_cpu(cpumask_local_spread(i, numa_node), + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), priv->irq_info[i].mask); err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); -- GitLab From b22c86ff8e78a74f4b1a531f671beb97278b138d Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 28 Dec 2016 14:58:35 +0200 Subject: [PATCH 0426/1442] net/mlx5: Mask destination mac value in ethtool steering rules [ Upstream commit 077b1e8069b9b74477b01d28f6b83774dc19a142 ] We need to mask the destination mac value with the destination mac mask when adding steering rule via ethtool. Fixes: 1174fce8d1410 ('net/mlx5e: Support l3/l4 flow type specs in ethtool flow steering') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index d17c24227900..90e81ae9f3bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -247,6 +247,7 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v, } if (fs->flow_type & FLOW_MAC_EXT && !is_zero_ether_addr(fs->m_ext.h_dest)) { + mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, dmac_47_16), fs->m_ext.h_dest); -- GitLab From 33c782dd15145746475758a8052bd698ae1a6e90 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Wed, 28 Dec 2016 14:58:37 +0200 Subject: [PATCH 0427/1442] net/mlx5: Prevent setting multicast macs for VFs [ Upstream commit ccce1700263d8b5b219359d04180492a726cea16 ] Need to check that VF mac address entered by the admin user is either zero or unicast mac. Multicast mac addresses are prohibited. Fixes: 77256579c6b4 ('net/mlx5: E-Switch, Introduce Vport administration functions') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index be1f7333ab7f..c7011ef4e351 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1703,7 +1703,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, if (!ESW_ALLOWED(esw)) return -EPERM; - if (!LEGAL_VPORT(esw, vport)) + if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) return -EINVAL; mutex_lock(&esw->state_lock); -- GitLab From ee9f2fd3f6b686ae6b04fc0c432984e10eb74006 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 28 Dec 2016 14:58:41 +0200 Subject: [PATCH 0428/1442] net/mlx5e: Don't sync netdev state when not registered [ Upstream commit 610e89e05c3f28a7394935aa6b91f99548c4fd3c ] Skip setting netdev vxlan ports and netdev rx_mode on driver load when netdev is not yet registered. Synchronizing with netdev state is needed only on reset flow where the netdev remains registered for the whole reset period. This also fixes an access before initialization of net_device.addr_list_lock - which for some reason initialized on register_netdev - where we queued set_rx_mode work on driver load before netdev registration. Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") Signed-off-by: Saeed Mahameed Reported-by: Sebastian Ott Reviewed-by: Mohamad Haj Yahia Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 246d98ebb588..307b2702832f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3773,14 +3773,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_lag_add(mdev, netdev); - if (mlx5e_vxlan_allowed(mdev)) { - rtnl_lock(); - udp_tunnel_get_rx_info(netdev); - rtnl_unlock(); - } - mlx5e_enable_async_events(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); if (MLX5_CAP_GEN(mdev, vport_group_manager)) { mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); @@ -3790,6 +3783,18 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) rep.priv_data = priv; mlx5_eswitch_register_vport_rep(esw, 0, &rep); } + + if (netdev->reg_state != NETREG_REGISTERED) + return; + + /* Device already registered: sync netdev system state */ + if (mlx5e_vxlan_allowed(mdev)) { + rtnl_lock(); + udp_tunnel_get_rx_info(netdev); + rtnl_unlock(); + } + + queue_work(priv->wq, &priv->set_rx_mode_work); } static void mlx5e_nic_disable(struct mlx5e_priv *priv) -- GitLab From 8cb7d6277f01327d0503cd679afa9b09e76f2c97 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 28 Dec 2016 14:58:42 +0200 Subject: [PATCH 0429/1442] net/mlx5e: Disable netdev after close [ Upstream commit 37f304d10030bb425c19099e7b955d9c3ec4cba3 ] Disable netdev should come after it was closed, although no harm of doing it before -hence the MLX5E_STATE_DESTROYING bit- but it is more natural this way. Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") Signed-off-by: Saeed Mahameed Reviewed-by: Mohamad Haj Yahia Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 307b2702832f..5dc3e2453ff5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3942,10 +3942,6 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) const struct mlx5e_profile *profile = priv->profile; set_bit(MLX5E_STATE_DESTROYING, &priv->state); - if (profile->disable) - profile->disable(priv); - - flush_workqueue(priv->wq); rtnl_lock(); if (netif_running(netdev)) @@ -3953,6 +3949,10 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) netif_device_detach(netdev); rtnl_unlock(); + if (profile->disable) + profile->disable(priv); + flush_workqueue(priv->wq); + mlx5e_destroy_q_counter(priv); profile->cleanup_rx(priv); mlx5e_close_drop_rq(priv); -- GitLab From 2ffc694b572729d5f3e450a3b7d953eb88f4d550 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 28 Dec 2016 17:52:15 +0100 Subject: [PATCH 0430/1442] rtnl: stats - add missing netlink message size checks [ Upstream commit 4775cc1f2d5abca894ac32774eefc22c45347d1c ] We miss to check if the netlink message is actually big enough to contain a struct if_stats_msg. Add a check to prevent userland from sending us short messages that would make us access memory beyond the end of the message. Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump...") Signed-off-by: Mathias Krause Cc: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a6196cf844f6..b7f9ae7b1c5f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3886,6 +3886,9 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) u32 filter_mask; int err; + if (nlmsg_len(nlh) < sizeof(*ifsm)) + return -EINVAL; + ifsm = nlmsg_data(nlh); if (ifsm->ifindex > 0) dev = __dev_get_by_index(net, ifsm->ifindex); @@ -3935,6 +3938,9 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = net->dev_base_seq; + if (nlmsg_len(cb->nlh) < sizeof(*ifsm)) + return -EINVAL; + ifsm = nlmsg_data(cb->nlh); filter_mask = ifsm->filter_mask; if (!filter_mask) -- GitLab From e7422080e35d1faff66016f4163a34b09de78815 Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Thu, 29 Dec 2016 16:45:04 +0800 Subject: [PATCH 0431/1442] net: fix incorrect original ingress device index in PKTINFO [ Upstream commit f0c16ba8933ed217c2688b277410b2a37ba81591 ] When we send a packet for our own local address on a non-loopback interface (e.g. eth0), due to the change had been introduced from commit 0b922b7a829c ("net: original ingress device index in PKTINFO"), the original ingress device index would be set as the loopback interface. However, the packet should be considered as if it is being arrived via the sending interface (eth0), otherwise it would break the expectation of the userspace application (e.g. the DHCPRELEASE message from dhcp_release binary would be ignored by the dnsmasq daemon, since it come from lo which is not the interface dnsmasq bind to) Fixes: 0b922b7a829c ("net: original ingress device index in PKTINFO") Acked-by: David Ahern Signed-off-by: Wei Zhang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index e869773e3979..f226f4086e05 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1202,8 +1202,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) * which has interface index (iif) as the first member of the * underlying inet{6}_skb_parm struct. This code then overlays * PKTINFO_SKB_CB and in_pktinfo also has iif as the first - * element so the iif is picked up from the prior IPCB + * element so the iif is picked up from the prior IPCB. If iif + * is the loopback interface, then return the sending interface + * (e.g., process binds socket to eth0 for Tx which is + * redirected to loopback in the rtable/dst). */ + if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) + pktinfo->ipi_ifindex = inet_iif(skb); + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; -- GitLab From a8a213f296ae0287e2f46a251e10a88f2c30ba61 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 29 Dec 2016 15:29:03 -0800 Subject: [PATCH 0432/1442] net: ipv4: dst for local input routes should use l3mdev if relevant [ Upstream commit f5a0aab84b74de68523599817569c057c7ac1622 ] IPv4 output routes already use l3mdev device instead of loopback for dst's if it is applicable. Change local input routes to do the same. This fixes icmp responses for unreachable UDP ports which are directed to the wrong table after commit 9d1a6c4ea43e4 because local_input routes use the loopback device. Moving from ingress device to loopback loses the L3 domain causing responses based on the dst to get to lost. Fixes: 9d1a6c4ea43e4 ("net: icmp_route_lookup should use rt dev to determine L3 domain") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2a57566e6e91..8197b06d9aaa 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1902,7 +1902,8 @@ out: return err; } } - rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type, + rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, + flags | RTCF_LOCAL, res.type, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; -- GitLab From 9f7551e05b0f59c09bf0ef8277e3364449feacb6 Mon Sep 17 00:00:00 2001 From: Reiter Wolfgang Date: Sat, 31 Dec 2016 21:11:57 +0100 Subject: [PATCH 0433/1442] drop_monitor: add missing call to genlmsg_end [ Upstream commit 4200462d88f47f3759bdf4705f87e207b0f5b2e4 ] Update nlmsg_len field with genlmsg_end to enable userspace processing using nlmsg_next helper. Also adds error handling. Signed-off-by: Reiter Wolfgang Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/drop_monitor.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 72cfb0c61125..5de61aaad586 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -80,6 +80,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) struct nlattr *nla; struct sk_buff *skb; unsigned long flags; + void *msg_header; al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); @@ -87,17 +88,31 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) skb = genlmsg_new(al, GFP_KERNEL); - if (skb) { - genlmsg_put(skb, 0, 0, &net_drop_monitor_family, - 0, NET_DM_CMD_ALERT); - nla = nla_reserve(skb, NLA_UNSPEC, - sizeof(struct net_dm_alert_msg)); - msg = nla_data(nla); - memset(msg, 0, al); - } else { - mod_timer(&data->send_timer, jiffies + HZ / 10); + if (!skb) + goto err; + + msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + if (!msg_header) { + nlmsg_free(skb); + skb = NULL; + goto err; + } + nla = nla_reserve(skb, NLA_UNSPEC, + sizeof(struct net_dm_alert_msg)); + if (!nla) { + nlmsg_free(skb); + skb = NULL; + goto err; } + msg = nla_data(nla); + memset(msg, 0, al); + genlmsg_end(skb, msg_header); + goto out; +err: + mod_timer(&data->send_timer, jiffies + HZ / 10); +out: spin_lock_irqsave(&data->lock, flags); swap(data->skb, skb); spin_unlock_irqrestore(&data->lock, flags); -- GitLab From 9f65f5d4746bdfe274a5f6ae7ce99c2441c8d286 Mon Sep 17 00:00:00 2001 From: Reiter Wolfgang Date: Tue, 3 Jan 2017 01:39:10 +0100 Subject: [PATCH 0434/1442] drop_monitor: consider inserted data in genlmsg_end [ Upstream commit 3b48ab2248e61408910e792fe84d6ec466084c1a ] Final nlmsg_len field update must reflect inserted net_dm_drop_point data. This patch depends on previous patch: "drop_monitor: add missing call to genlmsg_end" Signed-off-by: Reiter Wolfgang Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/drop_monitor.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 5de61aaad586..ca2c9c8b9a3e 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -107,7 +107,6 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) } msg = nla_data(nla); memset(msg, 0, al); - genlmsg_end(skb, msg_header); goto out; err: @@ -117,6 +116,13 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) swap(data->skb, skb); spin_unlock_irqrestore(&data->lock, flags); + if (skb) { + struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; + struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh); + + genlmsg_end(skb, genlmsg_data(gnlh)); + } + return skb; } -- GitLab From 7826d11cf44c53ff294e20ffe4365c068089054a Mon Sep 17 00:00:00 2001 From: Ian Kumlien Date: Mon, 2 Jan 2017 09:18:35 +0100 Subject: [PATCH 0435/1442] flow_dissector: Update pptp handling to avoid null pointer deref. [ Upstream commit d0af683407a26a4437d8fa6e283ea201f2ae8146 ] __skb_flow_dissect can be called with a skb or a data packet, either can be NULL. All calls seems to have been moved to __skb_header_pointer except the pptp handling which is still calling skb_header_pointer. skb_header_pointer will use skb->data and thus: [ 109.556866] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080 [ 109.557102] IP: [] __skb_flow_dissect+0xa88/0xce0 [ 109.557263] PGD 0 [ 109.557338] [ 109.557484] Oops: 0000 [#1] SMP [ 109.557562] Modules linked in: chaoskey [ 109.557783] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.9.0 #79 [ 109.557867] Hardware name: Supermicro A1SRM-LN7F/LN5F/A1SRM-LN7F-2758, BIOS 1.0c 11/04/2015 [ 109.557957] task: ffff94085c27bc00 task.stack: ffffb745c0068000 [ 109.558041] RIP: 0010:[] [] __skb_flow_dissect+0xa88/0xce0 [ 109.558203] RSP: 0018:ffff94087fc83d40 EFLAGS: 00010206 [ 109.558286] RAX: 0000000000000130 RBX: ffffffff8975bf80 RCX: ffff94084fab6800 [ 109.558373] RDX: 0000000000000010 RSI: 000000000000000c RDI: 0000000000000000 [ 109.558460] RBP: 0000000000000b88 R08: 0000000000000000 R09: 0000000000000022 [ 109.558547] R10: 0000000000000008 R11: ffff94087fc83e04 R12: 0000000000000000 [ 109.558763] R13: ffff94084fab6800 R14: ffff94087fc83e04 R15: 000000000000002f [ 109.558979] FS: 0000000000000000(0000) GS:ffff94087fc80000(0000) knlGS:0000000000000000 [ 109.559326] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 109.559539] CR2: 0000000000000080 CR3: 0000000281809000 CR4: 00000000001026e0 [ 109.559753] Stack: [ 109.559957] 000000000000000c ffff94084fab6822 0000000000000001 ffff94085c2b5fc0 [ 109.560578] 0000000000000001 0000000000002000 0000000000000000 0000000000000000 [ 109.561200] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 109.561820] Call Trace: [ 109.562027] [ 109.562108] [] ? eth_get_headlen+0x7a/0xf0 [ 109.562522] [] ? igb_poll+0x96a/0xe80 [ 109.562737] [] ? net_rx_action+0x20b/0x350 [ 109.562953] [] ? __do_softirq+0xe8/0x280 [ 109.563169] [] ? irq_exit+0xaa/0xb0 [ 109.563382] [] ? do_IRQ+0x4b/0xc0 [ 109.563597] [] ? common_interrupt+0x7f/0x7f [ 109.563810] [ 109.563890] [] ? cpuidle_enter_state+0x130/0x2c0 [ 109.564304] [] ? cpuidle_enter_state+0x120/0x2c0 [ 109.564520] [] ? cpu_startup_entry+0x19f/0x1f0 [ 109.564737] [] ? start_secondary+0x12a/0x140 [ 109.564950] Code: 83 e2 20 a8 80 0f 84 60 01 00 00 c7 04 24 08 00 00 00 66 85 d2 0f 84 be fe ff ff e9 69 fe ff ff 8b 34 24 89 f2 83 c2 04 66 85 c0 <41> 8b 84 24 80 00 00 00 0f 49 d6 41 8d 31 01 d6 41 2b 84 24 84 [ 109.569959] RIP [] __skb_flow_dissect+0xa88/0xce0 [ 109.570245] RSP [ 109.570453] CR2: 0000000000000080 Fixes: ab10dccb1160 ("rps: Inspect PPTP encapsulated by GRE to get flow hash") Signed-off-by: Ian Kumlien Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index c6d8207ffa7e..32e4e0158846 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -445,8 +445,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb, if (hdr->flags & GRE_ACK) offset += sizeof(((struct pptp_gre_header *)0)->ack); - ppp_hdr = skb_header_pointer(skb, nhoff + offset, - sizeof(_ppp_hdr), _ppp_hdr); + ppp_hdr = __skb_header_pointer(skb, nhoff + offset, + sizeof(_ppp_hdr), + data, hlen, _ppp_hdr); if (!ppp_hdr) goto out_bad; -- GitLab From fe1e13cfe2c41bb43942555cebe248153d3aba1b Mon Sep 17 00:00:00 2001 From: Michal Tesar Date: Mon, 2 Jan 2017 14:38:36 +0100 Subject: [PATCH 0436/1442] igmp: Make igmp group member RFC 3376 compliant [ Upstream commit 7ababb782690e03b78657e27bd051e20163af2d6 ] 5.2. Action on Reception of a Query When a system receives a Query, it does not respond immediately. Instead, it delays its response by a random amount of time, bounded by the Max Resp Time value derived from the Max Resp Code in the received Query message. A system may receive a variety of Queries on different interfaces and of different kinds (e.g., General Queries, Group-Specific Queries, and Group-and-Source-Specific Queries), each of which may require its own delayed response. Before scheduling a response to a Query, the system must first consider previously scheduled pending responses and in many cases schedule a combined response. Therefore, the system must be able to maintain the following state: o A timer per interface for scheduling responses to General Queries. o A per-group and interface timer for scheduling responses to Group- Specific and Group-and-Source-Specific Queries. o A per-group and interface list of sources to be reported in the response to a Group-and-Source-Specific Query. When a new Query with the Router-Alert option arrives on an interface, provided the system has state to report, a delay for a response is randomly selected in the range (0, [Max Resp Time]) where Max Resp Time is derived from Max Resp Code in the received Query message. The following rules are then used to determine if a Report needs to be scheduled and the type of Report to schedule. The rules are considered in order and only the first matching rule is applied. 1. If there is a pending response to a previous General Query scheduled sooner than the selected delay, no additional response needs to be scheduled. 2. If the received Query is a General Query, the interface timer is used to schedule a response to the General Query after the selected delay. Any previously pending response to a General Query is canceled. --8<-- Currently the timer is rearmed with new random expiration time for every incoming query regardless of possibly already pending report. Which is not aligned with the above RFE. It also might happen that higher rate of incoming queries can postpone the report after the expiration time of the first query causing group membership loss. Now the per interface general query timer is rearmed only when there is no pending report already scheduled on that interface or the newly selected expiration time is before the already pending scheduled report. Signed-off-by: Michal Tesar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/igmp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 15db786d50ed..32a08bc010bf 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -219,9 +219,14 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) static void igmp_gq_start_timer(struct in_device *in_dev) { int tv = prandom_u32() % in_dev->mr_maxdelay; + unsigned long exp = jiffies + tv + 2; + + if (in_dev->mr_gq_running && + time_after_eq(exp, (in_dev->mr_gq_timer).expires)) + return; in_dev->mr_gq_running = 1; - if (!mod_timer(&in_dev->mr_gq_timer, jiffies+tv+2)) + if (!mod_timer(&in_dev->mr_gq_timer, exp)) in_dev_hold(in_dev); } -- GitLab From efc455f08ea8c31db60e3a9b307f6abb8ef63ede Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 2 Jan 2017 13:32:54 -0800 Subject: [PATCH 0437/1442] ipv4: Do not allow MAIN to be alias for new LOCAL w/ custom rules [ Upstream commit 5350d54f6cd12eaff623e890744c79b700bd3f17 ] In the case of custom rules being present we need to handle the case of the LOCAL table being intialized after the new rule has been added. To address that I am adding a new check so that we can make certain we don't use an alias of MAIN for LOCAL when allocating a new table. Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") Reported-by: Oliver Brunel Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 161fc0f0d752..3e4f183fc241 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -85,7 +85,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - if (id == RT_TABLE_LOCAL) + if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules) alias = fib_new_table(net, RT_TABLE_MAIN); tb = fib_trie_table(id, alias); -- GitLab From a4d205a595217a45450ad402cbc5326c37dc0359 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 3 Jan 2017 09:37:55 -0800 Subject: [PATCH 0438/1442] net: vrf: Add missing Rx counters [ Upstream commit 926d93a33e59b2729afdbad357233c17184de9d2 ] The move from rx-handler to L3 receive handler inadvertantly dropped the rx counters. Restore them. Fixes: 74b20582ac38 ("net: l3mdev: Add hook in ip and ipv6") Reported-by: Dinesh Dutt Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3cb35881dfda..809a7968de01 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -968,6 +968,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, */ need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); if (!ipv6_ndisc_frame(skb) && !need_strict) { + vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; @@ -1009,6 +1010,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, goto out; } + vrf_rx_stats(vrf_dev, skb->len); + skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); -- GitLab From b55f6ca7380d609a40e0441d345404d100da3d45 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 7 Jan 2017 00:26:33 +0100 Subject: [PATCH 0439/1442] bpf: change back to orig prog on too many passes [ Upstream commit 9d5ecb09d525469abd1a10c096cb5a17206523f2 ] If after too many passes still no image could be emitted, then swap back to the original program as we do in all other cases and don't use the one with blinding. Fixes: 959a75791603 ("bpf, x86: add support for constant blinding") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/x86/net/bpf_jit_comp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index fe04a04dab8e..15f743615923 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1172,6 +1172,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) set_memory_ro((unsigned long)header, header->pages); prog->bpf_func = (void *)image; prog->jited = 1; + } else { + prog = orig_prog; } out_addrs: -- GitLab From ac77aab46168435a88911f5a3e1b9e0c246d3c14 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 7 Jan 2017 21:01:56 -0800 Subject: [PATCH 0440/1442] net: dsa: bcm_sf2: Do not clobber b53_switch_ops [ Upstream commit a4c61b92b3a4cbda35bb0251a5063a68f0861b2c ] We make the bcm_sf2 driver override ds->ops which points to b53_switch_ops since b53_switch_alloc() did the assignent. This is all well and good until a second b53 switch comes in, and ends up using the bcm_sf2 operations. Make a proper local copy, substitute the ds->ops pointer and then override the operations. Fixes: f458995b9ad8 ("net: dsa: bcm_sf2: Utilize core B53 driver when possible") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 9ec33b51a0ed..2f9f910c0e40 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -982,6 +982,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME; struct device_node *dn = pdev->dev.of_node; struct b53_platform_data *pdata; + struct dsa_switch_ops *ops; struct bcm_sf2_priv *priv; struct b53_device *dev; struct dsa_switch *ds; @@ -995,6 +996,10 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; + ops = devm_kzalloc(&pdev->dev, sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + dev = b53_switch_alloc(&pdev->dev, &bcm_sf2_io_ops, priv); if (!dev) return -ENOMEM; @@ -1014,6 +1019,8 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) ds = dev->ds; /* Override the parts that are non-standard wrt. normal b53 devices */ + memcpy(ops, ds->ops, sizeof(*ops)); + ds->ops = ops; ds->ops->get_tag_protocol = bcm_sf2_sw_get_tag_protocol; ds->ops->setup = bcm_sf2_sw_setup; ds->ops->get_phy_flags = bcm_sf2_sw_get_phy_flags; -- GitLab From 294f2c8896370e0ce6c258df140fafe8fc43a857 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 7 Jan 2017 21:01:57 -0800 Subject: [PATCH 0441/1442] net: dsa: bcm_sf2: Utilize nested MDIO read/write [ Upstream commit 2cfe8f8290bd28cf1ee67db914a6e76cf8e6437b ] We are implementing a MDIO bus which is behind another one, so use the nested version of the accessors to get lockdep annotations correct. Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 2f9f910c0e40..2ce7ae97ac91 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -393,7 +393,7 @@ static int bcm_sf2_sw_mdio_read(struct mii_bus *bus, int addr, int regnum) if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr)) return bcm_sf2_sw_indir_rw(priv, 1, addr, regnum, 0); else - return mdiobus_read(priv->master_mii_bus, addr, regnum); + return mdiobus_read_nested(priv->master_mii_bus, addr, regnum); } static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum, @@ -407,7 +407,7 @@ static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum, if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr)) bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val); else - mdiobus_write(priv->master_mii_bus, addr, regnum, val); + mdiobus_write_nested(priv->master_mii_bus, addr, regnum, val); return 0; } -- GitLab From c8a89b4f5248e8d6742317b674010bdfeb31e06e Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 10 Jan 2017 17:04:06 +0800 Subject: [PATCH 0442/1442] r8152: split rtl8152_suspend function [ Upstream commit 8fb280616878b81c0790a0c33acbeec59c5711f4 ] Split rtl8152_suspend() into rtl8152_system_suspend() and rtl8152_rumtime_suspend(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 57 +++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index efb84f092492..1d1fc379eeaf 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3576,39 +3576,62 @@ static bool delay_autosuspend(struct r8152 *tp) return false; } -static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) +static int rtl8152_rumtime_suspend(struct r8152 *tp) { - struct r8152 *tp = usb_get_intfdata(intf); struct net_device *netdev = tp->netdev; int ret = 0; - mutex_lock(&tp->control); - - if (PMSG_IS_AUTO(message)) { - if (netif_running(netdev) && delay_autosuspend(tp)) { + if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { + if (delay_autosuspend(tp)) { ret = -EBUSY; goto out1; } - set_bit(SELECTIVE_SUSPEND, &tp->flags); - } else { - netif_device_detach(netdev); + clear_bit(WORK_ENABLE, &tp->flags); + usb_kill_urb(tp->intr_urb); + napi_disable(&tp->napi); + rtl_stop_rx(tp); + tp->rtl_ops.autosuspend_en(tp, true); + napi_enable(&tp->napi); } + set_bit(SELECTIVE_SUSPEND, &tp->flags); + +out1: + return ret; +} + +static int rtl8152_system_suspend(struct r8152 *tp) +{ + struct net_device *netdev = tp->netdev; + int ret = 0; + + netif_device_detach(netdev); + if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); napi_disable(&tp->napi); - if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { - rtl_stop_rx(tp); - tp->rtl_ops.autosuspend_en(tp, true); - } else { - cancel_delayed_work_sync(&tp->schedule); - tp->rtl_ops.down(tp); - } + cancel_delayed_work_sync(&tp->schedule); + tp->rtl_ops.down(tp); napi_enable(&tp->napi); } -out1: + + return ret; +} + +static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct r8152 *tp = usb_get_intfdata(intf); + int ret; + + mutex_lock(&tp->control); + + if (PMSG_IS_AUTO(message)) + ret = rtl8152_rumtime_suspend(tp); + else + ret = rtl8152_system_suspend(tp); + mutex_unlock(&tp->control); return ret; -- GitLab From 66f24d624baaa9934c3288cd01d2b6dd65a399d3 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 10 Jan 2017 17:04:07 +0800 Subject: [PATCH 0443/1442] r8152: fix rx issue for runtime suspend [ Upstream commit 75dc692eda114cb234a46cb11893a9c3ea520934 ] Pause the rx and make sure the rx fifo is empty when the autosuspend occurs. If the rx data comes when the driver is canceling the rx urb, the host controller would stop getting the data from the device and continue it after next rx urb is submitted. That is, one continuing data is split into two different urb buffers. That let the driver take the data as a rx descriptor, and unexpected behavior happens. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 1d1fc379eeaf..4b5cb162442b 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3582,17 +3582,42 @@ static int rtl8152_rumtime_suspend(struct r8152 *tp) int ret = 0; if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { + u32 rcr = 0; + if (delay_autosuspend(tp)) { ret = -EBUSY; goto out1; } + if (netif_carrier_ok(netdev)) { + u32 ocp_data; + + rcr = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); + ocp_data = rcr & ~RCR_ACPT_ALL; + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + rxdy_gated_en(tp, true); + ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, + PLA_OOB_CTRL); + if (!(ocp_data & RXFIFO_EMPTY)) { + rxdy_gated_en(tp, false); + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr); + ret = -EBUSY; + goto out1; + } + } + clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); - napi_disable(&tp->napi); - rtl_stop_rx(tp); + tp->rtl_ops.autosuspend_en(tp, true); - napi_enable(&tp->napi); + + if (netif_carrier_ok(netdev)) { + napi_disable(&tp->napi); + rtl_stop_rx(tp); + rxdy_gated_en(tp, false); + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr); + napi_enable(&tp->napi); + } } set_bit(SELECTIVE_SUSPEND, &tp->flags); -- GitLab From 2ff4a0243c9e7b16c4fa1e9b7ab08525f4b11a40 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 9 Jan 2017 11:58:34 -0800 Subject: [PATCH 0444/1442] net: dsa: Ensure validity of dst->ds[0] [ Upstream commit faf3a932fbeb77860226a8323eacb835edc98648 ] It is perfectly possible to have non zero indexed switches being present in a DSA switch tree, in such a case, we will be deferencing a NULL pointer while dsa_cpu_port_ethtool_{setup,restore}. Be more defensive and ensure that dst->ds[0] is valid before doing anything with it. Fixes: 0c73c523cf73 ("net: dsa: Initialize CPU port ethtool ops per tree") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/dsa2.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 5fff951a0a49..da3862124545 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -394,9 +394,11 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) return err; } - err = dsa_cpu_port_ethtool_setup(dst->ds[0]); - if (err) - return err; + if (dst->ds[0]) { + err = dsa_cpu_port_ethtool_setup(dst->ds[0]); + if (err) + return err; + } /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get @@ -433,7 +435,8 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_ds_unapply(dst, ds); } - dsa_cpu_port_ethtool_restore(dst->ds[0]); + if (dst->ds[0]) + dsa_cpu_port_ethtool_restore(dst->ds[0]); pr_info("DSA: tree %d unapplied\n", dst->tree); dst->applied = false; -- GitLab From 33364eee1fe4b1bdff89312645ee3a624858dbc9 Mon Sep 17 00:00:00 2001 From: "Anna, Suman" Date: Mon, 9 Jan 2017 21:48:56 -0600 Subject: [PATCH 0445/1442] net: add the AF_QIPCRTR entries to family name tables [ Upstream commit 5d722b3024f6762addb8642ffddc9f275b5107ae ] Commit bdabad3e363d ("net: Add Qualcomm IPC router") introduced a new address family. Update the family name tables accordingly so that the lockdep initialization can use the proper names for this family. Cc: Courtney Cavin Cc: Bjorn Andersson Signed-off-by: Suman Anna Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 00a074dbfe9b..bc6543f7de36 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -222,7 +222,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , - "sk_lock-AF_MAX" + "sk_lock-AF_QIPCRTR", "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -239,7 +239,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , - "slock-AF_MAX" + "slock-AF_QIPCRTR", "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -256,7 +256,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , - "clock-AF_MAX" + "clock-AF_QIPCRTR", "clock-AF_MAX" }; /* -- GitLab From ec0fdcb88c6f451dc3d436406de69fbf48a2e1da Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jan 2017 12:24:01 -0800 Subject: [PATCH 0446/1442] gro: Enter slow-path if there is no tailroom [ Upstream commit 1272ce87fa017ca4cf32920764d879656b7a005a ] The GRO path has a fast-path where we avoid calling pskb_may_pull and pskb_expand by directly accessing frag0. However, this should only be done if we have enough tailroom in the skb as otherwise we'll have to expand it later anyway. This patch adds the check by capping frag0_len with the skb tailroom. Fixes: cb18978cbf45 ("gro: Open-code final pskb_may_pull") Reported-by: Slava Shwartsman Signed-off-by: Herbert Xu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6666b28b6815..7b4fa1e4e4d8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4453,7 +4453,8 @@ static void skb_gro_reset_offset(struct sk_buff *skb) pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); + NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0), + skb->end - skb->tail); } } -- GitLab From 934ca017c85083ee25062f4d92935f6edc336bd5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Jan 2017 19:52:43 -0800 Subject: [PATCH 0447/1442] gro: use min_t() in skb_gro_reset_offset() [ Upstream commit 7cfd5fd5a9813f1430290d20c0fead9b4582a307 ] On 32bit arches, (skb->end - skb->data) is not 'unsigned int', so we shall use min_t() instead of min() to avoid a compiler error. Fixes: 1272ce87fa01 ("gro: Enter slow-path if there is no tailroom") Reported-by: kernel test robot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 7b4fa1e4e4d8..e1d731fdc72c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4453,8 +4453,9 @@ static void skb_gro_reset_offset(struct sk_buff *skb) pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0), - skb->end - skb->tail); + NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, + skb_frag_size(frag0), + skb->end - skb->tail); } } -- GitLab From 17a561b19a274448c99b49cd0eb148e5890576ce Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jan 2017 12:24:15 -0800 Subject: [PATCH 0448/1442] gro: Disable frag0 optimization on IPv6 ext headers [ Upstream commit 57ea52a865144aedbcd619ee0081155e658b6f7d ] The GRO fast path caches the frag0 address. This address becomes invalid if frag0 is modified by pskb_may_pull or its variants. So whenever that happens we must disable the frag0 optimization. This is usually done through the combination of gro_header_hard and gro_header_slow, however, the IPv6 extension header path did the pulling directly and would continue to use the GRO fast path incorrectly. This patch fixes it by disabling the fast path when we enter the IPv6 extension header path. Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address") Reported-by: Slava Shwartsman Signed-off-by: Herbert Xu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 9 +++++++-- net/ipv6/ip6_offload.c | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e16a2a980ea8..d83590ef74a1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2502,14 +2502,19 @@ static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) return NAPI_GRO_CB(skb)->frag0_len < hlen; } +static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; +} + static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, unsigned int offset) { if (!pskb_may_pull(skb, hlen)) return NULL; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; + skb_gro_frag0_invalidate(skb); return skb->data + offset; } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 89c59e656f44..fc7b4017ba24 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -191,6 +191,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) { __pskb_pull(skb, skb_gro_offset(skb)); + skb_gro_frag0_invalidate(skb); proto = ipv6_gso_pull_exthdrs(skb, proto); skb_gro_pull(skb, -skb_transport_offset(skb)); skb_reset_transport_header(skb); -- GitLab From 7cc73483a4c7f0a2ac71b590ca35877f43bf0f2d Mon Sep 17 00:00:00 2001 From: Gil Rockah Date: Tue, 10 Jan 2017 22:33:38 +0200 Subject: [PATCH 0449/1442] net/mlx5e: Remove WARN_ONCE from adaptive moderation code [ Upstream commit 0bbcc0a8fc394d01988fe0263ccf7fddb77a12c3 ] When trying to do interface down or changing interface configuration under heavy traffic, some of the adaptive moderation corner cases can occur and leave a WARN_ONCE call trace in the kernel log. Those WARN_ONCE are meant for debug only, and should have been inserted only under debug. We avoid such call traces by removing those WARN_ONCE. Fixes: cb3c7fd4f839 ("net/mlx5e: Support adaptive RX coalescing") Signed-off-by: Gil Rockah Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c index 1fffe48a93cc..cbfac06b7ffd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -109,7 +109,6 @@ static bool mlx5e_am_on_top(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_on_top: PARKING\n"); return true; case MLX5E_AM_GOING_RIGHT: return (am->steps_left > 1) && (am->steps_right == 1); @@ -123,7 +122,6 @@ static void mlx5e_am_turn(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_turn: PARKING\n"); break; case MLX5E_AM_GOING_RIGHT: am->tune_state = MLX5E_AM_GOING_LEFT; @@ -144,7 +142,6 @@ static int mlx5e_am_step(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_step: PARKING\n"); break; case MLX5E_AM_GOING_RIGHT: if (am->profile_ix == (MLX5E_PARAMS_AM_NUM_PROFILES - 1)) @@ -282,10 +279,8 @@ static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start, u32 delta_us = ktime_us_delta(end->time, start->time); unsigned int npkts = end->pkt_ctr - start->pkt_ctr; - if (!delta_us) { - WARN_ONCE(true, "mlx5e_am_calc_stats: delta_us=0\n"); + if (!delta_us) return; - } curr_stats->ppms = (npkts * USEC_PER_MSEC) / delta_us; curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us; -- GitLab From 7b7a5a85b1d90efbad5b5c9efe8a06e13832ec01 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 10 Jan 2017 14:37:35 -0800 Subject: [PATCH 0450/1442] net: ipv4: Fix multipath selection with vrf [ Upstream commit 7a18c5b9fb31a999afc62b0e60978aa896fc89e9 ] fib_select_path does not call fib_select_multipath if oif is set in the flow struct. For VRF use cases oif is always set, so multipath route selection is bypassed. Use the FLOWI_FLAG_SKIP_NH_OIF to skip the oif check similar to what is done in fib_table_lookup. Add saddr and proto to the flow struct for the fib lookup done by the VRF driver to better match hash computation for a flow. Fixes: 613d09b30f8b ("net: Use VRF device index for lookups on TX") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 2 ++ net/ipv4/fib_semantics.c | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 809a7968de01..12b8085430e8 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -263,7 +263,9 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, + .flowi4_proto = ip4h->protocol, .daddr = ip4h->daddr, + .saddr = ip4h->saddr, }; struct net *net = dev_net(vrf_dev); struct rtable *rt; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 388d3e21629b..a8508b79b406 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1617,8 +1617,13 @@ void fib_select_multipath(struct fib_result *res, int hash) void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, int mp_hash) { + bool oif_check; + + oif_check = (fl4->flowi4_oif == 0 || + fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF); + #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { + if (res->fi->fib_nhs > 1 && oif_check) { if (mp_hash < 0) mp_hash = get_hash_from_flowi4(fl4) >> 1; @@ -1628,7 +1633,7 @@ void fib_select_path(struct net *net, struct fib_result *res, #endif if (!res->prefixlen && res->table->tb_num_default > 1 && - res->type == RTN_UNICAST && !fl4->flowi4_oif) + res->type == RTN_UNICAST && oif_check) fib_select_default(fl4, res); if (!fl4->saddr) -- GitLab From e425ed1d3c75295a3578df1a8b0314a83d9a5ec8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 10 Jan 2017 15:22:25 -0800 Subject: [PATCH 0451/1442] net: vrf: do not allow table id 0 [ Upstream commit 24c63bbc18e25d5d8439422aa5fd2d66390b88eb ] Frank reported that vrf devices can be created with a table id of 0. This breaks many of the run time table id checks and should not be allowed. Detect this condition at create time and fail with EINVAL. Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Reported-by: Frank Kellermann Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 12b8085430e8..95cf1d844781 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1239,6 +1239,8 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]); + if (vrf->tb_id == RT_TABLE_UNSPEC) + return -EINVAL; dev->priv_flags |= IFF_L3MDEV_MASTER; -- GitLab From 2c867216c555f5897b327daed6240bfb9e489c97 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 6 Jan 2017 15:33:36 +0100 Subject: [PATCH 0452/1442] HID: hid-cypress: validate length of report commit 1ebb71143758f45dc0fa76e2f48429e13b16d110 upstream. Make sure we have enough of a report structure to validate before looking at it. Reported-by: Benoit Camredon Tested-by: Benoit Camredon Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jiri Kosina --- drivers/hid/hid-cypress.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c index 1b764d1745f3..1689568b597d 100644 --- a/drivers/hid/hid-cypress.c +++ b/drivers/hid/hid-cypress.c @@ -39,6 +39,9 @@ static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX)) return rdesc; + if (*rsize < 4) + return rdesc; + for (i = 0; i < *rsize - 4; i++) if (rdesc[i] == 0x29 && rdesc[i + 2] == 0x19) { rdesc[i] = 0x19; -- GitLab From 2fc33ff4ba817484a5307a4b5ef352d20d7af746 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 3 Jan 2017 11:58:33 +0900 Subject: [PATCH 0453/1442] ALSA: firewire-tascam: Fix to handle error from initialization of stream data commit 6a2a2f45560a9cb7bc49820883b042e44f83726c upstream. This module has a bug not to return error code in a case that data structure for transmitted packets fails to be initialized. This commit fixes the bug. Fixes: 35efa5c489de ("ALSA: firewire-tascam: add streaming functionality") Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/tascam/tascam-stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/tascam/tascam-stream.c b/sound/firewire/tascam/tascam-stream.c index 4ad3bd7fd445..f1657a4e0621 100644 --- a/sound/firewire/tascam/tascam-stream.c +++ b/sound/firewire/tascam/tascam-stream.c @@ -343,7 +343,7 @@ int snd_tscm_stream_init_duplex(struct snd_tscm *tscm) if (err < 0) amdtp_stream_destroy(&tscm->rx_stream); - return 0; + return err; } /* At bus reset, streaming is stopped and some registers are clear. */ -- GitLab From e7d05ec1923e59a976d1095108a44a5f0533a0fe Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 22 Dec 2016 21:06:53 -0600 Subject: [PATCH 0454/1442] powerpc: Fix build warning on 32-bit PPC commit 8ae679c4bc2ea2d16d92620da8e3e9332fa4039f upstream. I am getting the following warning when I build kernel 4.9-git on my PowerBook G4 with a 32-bit PPC processor: AS arch/powerpc/kernel/misc_32.o arch/powerpc/kernel/misc_32.S:299:7: warning: "CONFIG_FSL_BOOKE" is not defined [-Wundef] This problem is evident after commit 989cea5c14be ("kbuild: prevent lib-ksyms.o rebuilds"); however, this change in kbuild only exposes an error that has been in the code since 2005 when this source file was created. That was with commit 9994a33865f4 ("powerpc: Introduce entry_{32,64}.S, misc_{32,64}.S, systbl.S"). The offending line does not make a lot of sense. This error does not seem to cause any errors in the executable, thus I am not recommending that it be applied to any stable versions. Thanks to Nicholas Piggin for suggesting this solution. Fixes: 9994a33865f4 ("powerpc: Introduce entry_{32,64}.S, misc_{32,64}.S, systbl.S") Signed-off-by: Larry Finger Cc: Nicholas Piggin Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/misc_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 93cf7a5846a6..030d72df5dd5 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -296,7 +296,7 @@ _GLOBAL(flush_instruction_cache) lis r3, KERNELBASE@h iccci 0,r3 #endif -#elif CONFIG_FSL_BOOKE +#elif defined(CONFIG_FSL_BOOKE) BEGIN_FTR_SECTION mfspr r3,SPRN_L1CSR0 ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC -- GitLab From 08aed6e8883d3be7a8a1578231fc3dcafd2a9df3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Nov 2016 10:25:12 +0000 Subject: [PATCH 0455/1442] tools/virtio: fix READ_ONCE() commit 5da889c795b1fbefc9d8f058b54717ab8ab17891 upstream. The virtio tools implementation of READ_ONCE() has a single parameter called 'var', but erroneously refers to 'val' for its cast, and thus won't work unless there's a variable of the correct type that happens to be called 'var'. Fix this with s/var/val/, making READ_ONCE() work as expected regardless. Fixes: a7c490333df3cff5 ("tools/virtio: use virt_xxx barriers") Signed-off-by: Mark Rutland Cc: Jason Wang Cc: Michael S. Tsirkin Cc: linux-kernel@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: Jason Wang Signed-off-by: Greg Kroah-Hartman --- tools/virtio/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h index 845960e1cbf2..c9ccfd42ec13 100644 --- a/tools/virtio/linux/compiler.h +++ b/tools/virtio/linux/compiler.h @@ -4,6 +4,6 @@ #define WRITE_ONCE(var, val) \ (*((volatile typeof(val) *)(&(var))) = (val)) -#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var)))) +#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var)))) #endif -- GitLab From d40152d5ac671bba821f0bd2183f2a59de1ae326 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Wed, 26 Oct 2016 16:15:00 +0200 Subject: [PATCH 0456/1442] arm64: dts: mt8173: Fix auxadc node commit a3207d644fb89e5d7d5e01f00c04dcfc6d2d44d5 upstream. The devicetree node for mt8173-auxadc lacks the clock and io-channel-cells property. This leads to a non-working driver. mt6577-auxadc 11001000.auxadc: failed to get auxadc clock mt6577-auxadc: probe of 11001000.auxadc failed with error -2 Fix these fields to get the device up and running. Fixes: 748c7d4de46a ("ARM64: dts: mt8173: Add thermal/auxadc device nodes") Signed-off-by: Matthias Brugger Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8173.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index 1c71e256601d..6c03c1702bb3 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -450,6 +450,9 @@ auxadc: auxadc@11001000 { compatible = "mediatek,mt8173-auxadc"; reg = <0 0x11001000 0 0x1000>; + clocks = <&pericfg CLK_PERI_AUXADC>; + clock-names = "main"; + #io-channel-cells = <1>; }; uart0: serial@11002000 { -- GitLab From e3937bc1cc0bb627e3495e7635d7bb5243ad90eb Mon Sep 17 00:00:00 2001 From: Andrea Merello Date: Fri, 11 Nov 2016 09:38:21 -0800 Subject: [PATCH 0457/1442] ARM64: dts: bcm2837-rpi-3-b: remove incorrect pwr LED commit a44e87b47148c6ee6b78509f47e6a15c0fae890a upstream. We are incorrectly defining the pwr LED, attaching it to a gpio line that is wired to the Wi-Fi SDIO module (which fails due to this). The actual power LED is connected to the GPIO expander, which we don't expose currently. Fixes: 9d56c22a7861 ("ARM: bcm2835: Add devicetree for the Raspberry Pi 3.") Thanks-to: Eric Anholt [for clarifying we can't control the LED] Signed-off-by: Andrea Merello Signed-off-by: Eric Anholt Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts index 7841b724e340..4617759670da 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts +++ b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts @@ -15,13 +15,6 @@ act { gpios = <&gpio 47 0>; }; - - pwr { - label = "PWR"; - gpios = <&gpio 35 0>; - default-state = "keep"; - linux,default-trigger = "default-on"; - }; }; }; -- GitLab From 596ff0afbe8e2e61fdb5cf222b8d72545af77e4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Mon, 24 Oct 2016 17:09:50 +0200 Subject: [PATCH 0458/1442] ARM64: dts: bcm2835: Fix bcm2837 compatible string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4f24450c6e580ac8591942c8bf65355a06b44635 upstream. bcm2837-rpi-3-b.dts, its only in-tree user, was overriding it as "brcm,bcm2837" already. Fixes: 9d56c22a7861 ("ARM: bcm2835: Add devicetree for the Raspberry Pi 3.") Cc: Stephen Warren Signed-off-by: Andreas Färber Signed-off-by: Eric Anholt Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/broadcom/bcm2837.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi index 8216bbb29fe0..c1f719b7097a 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi @@ -1,7 +1,7 @@ #include "bcm283x.dtsi" / { - compatible = "brcm,bcm2836"; + compatible = "brcm,bcm2837"; soc { ranges = <0x7e000000 0x3f000000 0x1000000>, -- GitLab From 78e2d9405e2ddb2c5c5c1f5dc9ead81191277c70 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Mon, 26 Sep 2016 09:21:51 +0200 Subject: [PATCH 0459/1442] ARM: pxa: fix pxa25x interrupt init commit e413bd33ac44b6d0bebc0ef2ac19cbe7558a7303 upstream. In the device-tree case, the root interrupt controller cannot be accessed through the 6th coprocessor, contrary to pxa27x and pxa3xx architectures. Fix it to behave as in non-devicetree builds. Fixes: 32f17997c130 ("ARM: pxa: remove irq init from dt machines") Signed-off-by: Robert Jarzmik Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/pxa25x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index 12b94357fbc1..c725baf119e1 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -156,7 +156,7 @@ static int __init __init pxa25x_dt_init_irq(struct device_node *node, struct device_node *parent) { pxa_dt_irq_init(pxa25x_set_wake); - set_handle_irq(ichp_handle_irq); + set_handle_irq(icip_handle_irq); return 0; } -- GitLab From e925eb34265951eca92cbdba31bffc46c80da9e6 Mon Sep 17 00:00:00 2001 From: Kyle Roeschley Date: Mon, 31 Oct 2016 11:26:17 -0500 Subject: [PATCH 0460/1442] ARM: zynq: Reserve correct amount of non-DMA RAM commit 7a3cc2a7b2c723aa552028f4e66841cec183756d upstream. On Zynq, we haven't been reserving the correct amount of DMA-incapable RAM to keep DMA away from it (per the Zynq TRM Section 4.1, it should be the first 512k). In older kernels, this was masked by the memblock_reserve call in arm_memblock_init(). Now, reserve the correct amount excplicitly rather than relying on swapper_pg_dir, which is an address and not a size anyway. Fixes: 46f5b96 ("ARM: zynq: Reserve not DMAable space in front of the kernel") Signed-off-by: Kyle Roeschley Tested-by: Nathan Rossi Signed-off-by: Michal Simek Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-zynq/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index d12002cd63bc..ed118648313f 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -59,7 +59,7 @@ void __iomem *zynq_scu_base; static void __init zynq_memory_init(void) { if (!__pa(PAGE_OFFSET)) - memblock_reserve(__pa(PAGE_OFFSET), __pa(swapper_pg_dir)); + memblock_reserve(__pa(PAGE_OFFSET), 0x80000); } static struct platform_device zynq_cpuidle_device = { -- GitLab From c2ce1c4133b31d60aaf0a075fe109e905ecb237b Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 7 Sep 2016 10:18:42 +0200 Subject: [PATCH 0461/1442] ARM: qcom_defconfig: Fix MDM9515 LCC and GCC config commit 206787737e308bb447d18adef7da7749188212f5 upstream. Correct prefix is MDM instead of MSM. Fixes: 8aa788d3e59a ("ARM: configs: qualcomm: Add MDM9615 missing defconfigs") Signed-off-by: Neil Armstrong Reviewed-by: Stephen Boyd Signed-off-by: Andy Gross Signed-off-by: Greg Kroah-Hartman --- arch/arm/configs/qcom_defconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig index c2dff4fd5fc4..9f6d2a69a6f7 100644 --- a/arch/arm/configs/qcom_defconfig +++ b/arch/arm/configs/qcom_defconfig @@ -162,8 +162,8 @@ CONFIG_APQ_MMCC_8084=y CONFIG_IPQ_LCC_806X=y CONFIG_MSM_GCC_8660=y CONFIG_MSM_LCC_8960=y -CONFIG_MSM_GCC_9615=y -CONFIG_MSM_LCC_9615=y +CONFIG_MDM_GCC_9615=y +CONFIG_MDM_LCC_9615=y CONFIG_MSM_MMCC_8960=y CONFIG_MSM_MMCC_8974=y CONFIG_HWSPINLOCK_QCOM=y -- GitLab From bd99e7a6036e847476f130bfadc67d772debe636 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Nov 2016 11:04:26 -0500 Subject: [PATCH 0462/1442] svcrdma: Clear xpt_bc_xps in xprt_setup_rdma_bc() error exit arm commit 1b9f700b8cfc31089e2dfa5d0905c52fd4529b50 upstream. Logic copied from xs_setup_bc_tcp(). Fixes: 39a9beab5acb ('rpc: share one xps between all backchannels') Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 20027f8de129..6035c5a380a6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -359,6 +359,7 @@ xprt_setup_rdma_bc(struct xprt_create *args) out_fail: xprt_rdma_free_addresses(xprt); args->bc_xprt->xpt_bc_xprt = NULL; + args->bc_xprt->xpt_bc_xps = NULL; xprt_put(xprt); xprt_free(xprt); return ERR_PTR(-EINVAL); -- GitLab From aa1c7b01c9c7f263a0eb91a543fb91078ab5a508 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Dec 2016 11:14:06 +0100 Subject: [PATCH 0463/1442] bus: arm-ccn: Prevent hotplug callback leak commit 26242b330093fd14c2e94fb6cbdf0f482ab26576 upstream. In case the driver registration fails, the hotplug callback is leaked. Not fatal, because it's never invoked as there are no instances registered, but wrong nevertheless. Fixes: fdc15a36d84e ("bus/arm-ccn: Convert to hotplug statemachine") Signed-off-by: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: Mark Rutland Cc: Pawel Moll Cc: Suzuki K Poulose Cc: Peter Zijlstra Cc: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/bus/arm-ccn.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index d1074d9b38ba..aee83462b796 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1570,7 +1570,10 @@ static int __init arm_ccn_init(void) for (i = 0; i < ARRAY_SIZE(arm_ccn_pmu_events); i++) arm_ccn_pmu_events_attrs[i] = &arm_ccn_pmu_events[i].attr.attr; - return platform_driver_register(&arm_ccn_driver); + ret = platform_driver_register(&arm_ccn_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE); + return ret; } static void __exit arm_ccn_exit(void) -- GitLab From 0f665deba9bc67a2af52e2e956a13e59ba1600f8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Nov 2016 16:50:11 -0700 Subject: [PATCH 0464/1442] ARM: OMAP5: Fix mpuss_early_init commit 8a8be46afeaa47aed1debe7e9b18152f9826a6b7 upstream. We need to properly initialize mpuss also on omap5 like we do on omap4. Otherwise we run into similar kexec problems like we had on omap4 when trying to kexec from a kernel with PM initialized. Fixes: 0573b957fc21 ("ARM: OMAP4+: Prevent CPU1 related hang with kexec") Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/common.h | 38 +++++++++++++++++------ arch/arm/mach-omap2/io.c | 3 +- arch/arm/mach-omap2/omap-mpuss-lowpower.c | 32 ++++++++++++++----- arch/arm/mach-omap2/omap4-sar-layout.h | 2 ++ 4 files changed, 58 insertions(+), 17 deletions(-) diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index deed42e1dd9c..6dcca2957e23 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -262,8 +262,6 @@ extern void __iomem *omap4_get_sar_ram_base(void); extern void omap4_mpuss_early_init(void); extern void omap_do_wfi(void); -extern void omap4_secondary_startup(void); -extern void omap4460_secondary_startup(void); #ifdef CONFIG_SMP /* Needed for secondary core boot */ @@ -275,16 +273,11 @@ extern void omap4_cpu_die(unsigned int cpu); extern int omap4_cpu_kill(unsigned int cpu); extern const struct smp_operations omap4_smp_ops; - -extern void omap5_secondary_startup(void); -extern void omap5_secondary_hyp_startup(void); #endif #if defined(CONFIG_SMP) && defined(CONFIG_PM) extern int omap4_mpuss_init(void); extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state); -extern int omap4_finish_suspend(unsigned long cpu_state); -extern void omap4_cpu_resume(void); extern int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state); #else static inline int omap4_enter_lowpower(unsigned int cpu, @@ -305,14 +298,41 @@ static inline int omap4_mpuss_init(void) return 0; } +#endif + +#ifdef CONFIG_ARCH_OMAP4 +void omap4_secondary_startup(void); +void omap4460_secondary_startup(void); +int omap4_finish_suspend(unsigned long cpu_state); +void omap4_cpu_resume(void); +#else +static inline void omap4_secondary_startup(void) +{ +} + +static inline void omap4460_secondary_startup(void) +{ +} static inline int omap4_finish_suspend(unsigned long cpu_state) { return 0; } - static inline void omap4_cpu_resume(void) -{} +{ +} +#endif +#if defined(CONFIG_SOC_OMAP5) || defined(CONFIG_SOC_DRA7XX) +void omap5_secondary_startup(void); +void omap5_secondary_hyp_startup(void); +#else +static inline void omap5_secondary_startup(void) +{ +} + +static inline void omap5_secondary_hyp_startup(void) +{ +} #endif void pdata_quirks_init(const struct of_device_id *); diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 0e9acdd95d70..f0da5259762a 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -717,10 +717,11 @@ void __init omap5_init_early(void) OMAP2_L4_IO_ADDRESS(OMAP54XX_SCM_BASE)); omap2_set_globals_prcm_mpu(OMAP2_L4_IO_ADDRESS(OMAP54XX_PRCM_MPU_BASE)); omap2_control_base_init(); - omap4_pm_init_early(); omap2_prcm_base_init(); omap5xxx_check_revision(); omap4_sar_ram_init(); + omap4_mpuss_early_init(); + omap4_pm_init_early(); omap54xx_voltagedomains_init(); omap54xx_powerdomains_init(); omap54xx_clockdomains_init(); diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c index ad982465efd0..94428b476d8f 100644 --- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c +++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include "soc.h" @@ -371,8 +372,12 @@ int __init omap4_mpuss_init(void) pm_info = &per_cpu(omap4_pm_info, 0x0); if (sar_base) { pm_info->scu_sar_addr = sar_base + SCU_OFFSET0; - pm_info->wkup_sar_addr = sar_base + - CPU0_WAKEUP_NS_PA_ADDR_OFFSET; + if (cpu_is_omap44xx()) + pm_info->wkup_sar_addr = sar_base + + CPU0_WAKEUP_NS_PA_ADDR_OFFSET; + else + pm_info->wkup_sar_addr = sar_base + + OMAP5_CPU0_WAKEUP_NS_PA_ADDR_OFFSET; pm_info->l2x0_sar_addr = sar_base + L2X0_SAVE_OFFSET0; } pm_info->pwrdm = pwrdm_lookup("cpu0_pwrdm"); @@ -391,8 +396,12 @@ int __init omap4_mpuss_init(void) pm_info = &per_cpu(omap4_pm_info, 0x1); if (sar_base) { pm_info->scu_sar_addr = sar_base + SCU_OFFSET1; - pm_info->wkup_sar_addr = sar_base + - CPU1_WAKEUP_NS_PA_ADDR_OFFSET; + if (cpu_is_omap44xx()) + pm_info->wkup_sar_addr = sar_base + + CPU1_WAKEUP_NS_PA_ADDR_OFFSET; + else + pm_info->wkup_sar_addr = sar_base + + OMAP5_CPU1_WAKEUP_NS_PA_ADDR_OFFSET; pm_info->l2x0_sar_addr = sar_base + L2X0_SAVE_OFFSET1; } @@ -453,15 +462,24 @@ void __init omap4_mpuss_early_init(void) { unsigned long startup_pa; - if (!cpu_is_omap44xx()) + if (!(cpu_is_omap44xx() || soc_is_omap54xx())) return; sar_base = omap4_get_sar_ram_base(); if (cpu_is_omap443x()) startup_pa = virt_to_phys(omap4_secondary_startup); - else + else if (cpu_is_omap446x()) startup_pa = virt_to_phys(omap4460_secondary_startup); + else if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE) + startup_pa = virt_to_phys(omap5_secondary_hyp_startup); + else + startup_pa = virt_to_phys(omap5_secondary_startup); - writel_relaxed(startup_pa, sar_base + CPU1_WAKEUP_NS_PA_ADDR_OFFSET); + if (cpu_is_omap44xx()) + writel_relaxed(startup_pa, sar_base + + CPU1_WAKEUP_NS_PA_ADDR_OFFSET); + else + writel_relaxed(startup_pa, sar_base + + OMAP5_CPU1_WAKEUP_NS_PA_ADDR_OFFSET); } diff --git a/arch/arm/mach-omap2/omap4-sar-layout.h b/arch/arm/mach-omap2/omap4-sar-layout.h index 792b1069f724..5b2966a0f733 100644 --- a/arch/arm/mach-omap2/omap4-sar-layout.h +++ b/arch/arm/mach-omap2/omap4-sar-layout.h @@ -31,6 +31,8 @@ /* CPUx Wakeup Non-Secure Physical Address offsets in SAR_BANK3 */ #define CPU0_WAKEUP_NS_PA_ADDR_OFFSET 0xa04 #define CPU1_WAKEUP_NS_PA_ADDR_OFFSET 0xa08 +#define OMAP5_CPU0_WAKEUP_NS_PA_ADDR_OFFSET 0xe00 +#define OMAP5_CPU1_WAKEUP_NS_PA_ADDR_OFFSET 0xe04 #define SAR_BACKUP_STATUS_OFFSET (SAR_BANK3_OFFSET + 0x500) #define SAR_SECURE_RAM_SIZE_OFFSET (SAR_BANK3_OFFSET + 0x504) -- GitLab From b336dc57bc927e0fb3e7d2591d70b9ec7e426e18 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Nov 2016 16:50:10 -0700 Subject: [PATCH 0465/1442] ARM: OMAP5: Fix build for PM code commit da6d5993bf951846956903bee4f0eafd918250db upstream. It's CONFIG_SOC_OMAP5, not CONFIG_ARCH_OMAP5. Looks like make randconfig builds have not hit this one yet. Fixes: b3bf289c1c45 ("ARM: OMAP2+: Fix build with CONFIG_SMP and CONFIG_PM is not set") Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 5b37ec29996e..e37ceb81a379 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -80,7 +80,7 @@ endif # Power Management omap-4-5-pm-common = omap-mpuss-lowpower.o obj-$(CONFIG_ARCH_OMAP4) += $(omap-4-5-pm-common) -obj-$(CONFIG_ARCH_OMAP5) += $(omap-4-5-pm-common) +obj-$(CONFIG_SOC_OMAP5) += $(omap-4-5-pm-common) obj-$(CONFIG_OMAP_PM_NOOP) += omap-pm-noop.o ifeq ($(CONFIG_PM),y) -- GitLab From 87dbf3dc165240f1a3bed1ac7243a6b73c474029 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Nov 2016 16:50:11 -0700 Subject: [PATCH 0466/1442] ARM: OMAP4+: Fix bad fallthrough for cpuidle commit cbf2642872333547b56b8c4d943f5ed04ac9a4ee upstream. We don't want to fall through to a bunch of errors for retention if PM_OMAP4_CPU_OSWR_DISABLE is not configured for a SoC. Fixes: 6099dd37c669 ("ARM: OMAP5 / DRA7: Enable CPU RET on suspend") Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/omap-mpuss-lowpower.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c index 94428b476d8f..7d62ad48c7c9 100644 --- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c +++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c @@ -245,10 +245,9 @@ int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state) save_state = 1; break; case PWRDM_POWER_RET: - if (IS_PM44XX_ERRATUM(PM_OMAP4_CPU_OSWR_DISABLE)) { + if (IS_PM44XX_ERRATUM(PM_OMAP4_CPU_OSWR_DISABLE)) save_state = 0; - break; - } + break; default: /* * CPUx CSWR is invalid hardware state. Also CPUx OSWR -- GitLab From b8ba5faa7a6bdf1d8a86c1fc77651c566445402d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 22 Nov 2016 12:33:11 +0100 Subject: [PATCH 0467/1442] ARM: 8631/1: clkdev: Detect errors in clk_hw_register_clkdev() for mass registration commit 9388093db44356af911adf3d355b7544a13a63cd upstream. Unlike clk_register_clkdev(), clk_hw_register_clkdev() doesn't check for passed error objects from a previous registration call. Hence the caller of clk_hw_register_*() has to check for errors before calling clk_hw_register_clkdev*(). Make clk_hw_register_clkdev() more similar to clk_register_clkdev() by adding this error check, removing the burden from callers that do mass registration. Fixes: e4f1b49bda6d6aa2 ("clkdev: Add clk_hw based registration APIs") Fixes: 944b9a41e004534f ("clk: ls1x: Migrate to clk_hw based OF and registration APIs") Fixes: 44ce9a9ae977736f ("MIPS: TXx9: Convert to Common Clock Framework") Fixes: f48d947a162dfa9d ("clk: clps711x: Migrate to clk_hw based OF and registration APIs") Fixes: b4626a7f489238a5 ("CLK: Add Loongson1C clock support") Signed-off-by: Geert Uytterhoeven Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clkdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index 97ae60fa1584..bb8a77a5985f 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -448,12 +448,20 @@ EXPORT_SYMBOL(clk_register_clkdev); * * con_id or dev_id may be NULL as a wildcard, just as in the rest of * clkdev. + * + * To make things easier for mass registration, we detect error clk_hws + * from a previous clk_hw_register_*() call, and return the error code for + * those. This is to permit this function to be called immediately + * after clk_hw_register_*(). */ int clk_hw_register_clkdev(struct clk_hw *hw, const char *con_id, const char *dev_id) { struct clk_lookup *cl; + if (IS_ERR(hw)) + return PTR_ERR(hw); + /* * Since dev_id can be NULL, and NULL is handled specially, we must * pass it as either a NULL format string, or with "%s". -- GitLab From 05b7bdf1c3d8fe9c1bd92887a863a02c4cdcd207 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Mon, 5 Dec 2016 09:27:44 +0530 Subject: [PATCH 0468/1442] ARM: omap2+: am437x: rollback to use omap3_gptimer_timer_init() commit f86a2c875fd146d9b82c8fdd86d31084507bcf4c upstream. The commit 55ee7017ee31 ("arm: omap2: board-generic: use omap4_local_timer_init for AM437x") unintentionally changes the clocksource devices for AM437x from OMAP GP Timer to SyncTimer32K. Unfortunately, the SyncTimer32K is starving from frequency deviation as mentioned in commit 5b5c01359152 ("ARM: OMAP2+: AM43x: Use gptimer as clocksource") and, as reported by Franklin [1], even its monotonic nature is under question (most probably there is a HW issue, but it's still under investigation). Taking into account above facts It's reasonable to rollback to the use of omap3_gptimer_timer_init(). [1] http://www.spinics.net/lists/linux-omap/msg127425.html Fixes: 55ee7017ee31 ("arm: omap2: board-generic: use omap4_local_timer_init for AM437x") Reported-by: Cooper Jr., Franklin Signed-off-by: Grygorii Strashko Signed-off-by: Lokesh Vutla Signed-off-by: Keerthy Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/board-generic.c | 2 +- arch/arm/mach-omap2/timer.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index bab814d2f37d..60f5e838a3bc 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -306,7 +306,7 @@ DT_MACHINE_START(AM43_DT, "Generic AM43 (Flattened Device Tree)") .init_late = am43xx_init_late, .init_irq = omap_gic_of_init, .init_machine = omap_generic_init, - .init_time = omap4_local_timer_init, + .init_time = omap3_gptimer_timer_init, .dt_compat = am43_boards_compat, .restart = omap44xx_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 5e2e2218a402..b2f2448bfa6d 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -510,18 +510,19 @@ void __init omap3_secure_sync32k_timer_init(void) } #endif /* CONFIG_ARCH_OMAP3 */ -#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) +#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) || \ + defined(CONFIG_SOC_AM43XX) void __init omap3_gptimer_timer_init(void) { __omap_sync32k_timer_init(2, "timer_sys_ck", NULL, 1, "timer_sys_ck", "ti,timer-alwon", true); - - clocksource_probe(); + if (of_have_populated_dt()) + clocksource_probe(); } #endif #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) || \ - defined(CONFIG_SOC_DRA7XX) || defined(CONFIG_SOC_AM43XX) + defined(CONFIG_SOC_DRA7XX) static void __init omap4_sync32k_timer_init(void) { __omap_sync32k_timer_init(1, "timer_32k_ck", "ti,timer-alwon", -- GitLab From f52e670a5b29dd4fc94f0e386ee85724ec052f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 8 Dec 2016 17:37:08 +0100 Subject: [PATCH 0469/1442] spi: mvebu: fix baudrate calculation for armada variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7243e0b20729d372e97763617a7a9c89f29b33e1 upstream. The calculation of SPR and SPPR doesn't round correctly at several places which might result in baud rates that are too big. For example with tclk_hz = 250000001 and target rate 25000000 it determined a divider of 10 which is wrong. Instead of fixing all the corner cases replace the calculation by an algorithm without a loop which should even be quicker to execute apart from being correct. Fixes: df59fa7f4bca ("spi: orion: support armada extended baud rates") Signed-off-by: Uwe Kleine-König Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-orion.c | 83 +++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 29 deletions(-) diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index ded37025b445..6b001c4a5640 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -138,37 +138,62 @@ static int orion_spi_baudrate_set(struct spi_device *spi, unsigned int speed) tclk_hz = clk_get_rate(orion_spi->clk); if (devdata->typ == ARMADA_SPI) { - unsigned int clk, spr, sppr, sppr2, err; - unsigned int best_spr, best_sppr, best_err; - - best_err = speed; - best_spr = 0; - best_sppr = 0; - - /* Iterate over the valid range looking for best fit */ - for (sppr = 0; sppr < 8; sppr++) { - sppr2 = 0x1 << sppr; - - spr = tclk_hz / sppr2; - spr = DIV_ROUND_UP(spr, speed); - if ((spr == 0) || (spr > 15)) - continue; - - clk = tclk_hz / (spr * sppr2); - err = speed - clk; - - if (err < best_err) { - best_spr = spr; - best_sppr = sppr; - best_err = err; - } - } + /* + * Given the core_clk (tclk_hz) and the target rate (speed) we + * determine the best values for SPR (in [0 .. 15]) and SPPR (in + * [0..7]) such that + * + * core_clk / (SPR * 2 ** SPPR) + * + * is as big as possible but not bigger than speed. + */ - if ((best_sppr == 0) && (best_spr == 0)) - return -EINVAL; + /* best integer divider: */ + unsigned divider = DIV_ROUND_UP(tclk_hz, speed); + unsigned spr, sppr; + + if (divider < 16) { + /* This is the easy case, divider is less than 16 */ + spr = divider; + sppr = 0; + + } else { + unsigned two_pow_sppr; + /* + * Find the highest bit set in divider. This and the + * three next bits define SPR (apart from rounding). + * SPPR is then the number of zero bits that must be + * appended: + */ + sppr = fls(divider) - 4; + + /* + * As SPR only has 4 bits, we have to round divider up + * to the next multiple of 2 ** sppr. + */ + two_pow_sppr = 1 << sppr; + divider = (divider + two_pow_sppr - 1) & -two_pow_sppr; + + /* + * recalculate sppr as rounding up divider might have + * increased it enough to change the position of the + * highest set bit. In this case the bit that now + * doesn't make it into SPR is 0, so there is no need to + * round again. + */ + sppr = fls(divider) - 4; + spr = divider >> sppr; + + /* + * Now do range checking. SPR is constructed to have a + * width of 4 bits, so this is fine for sure. So we + * still need to check for sppr to fit into 3 bits: + */ + if (sppr > 7) + return -EINVAL; + } - prescale = ((best_sppr & 0x6) << 5) | - ((best_sppr & 0x1) << 4) | best_spr; + prescale = ((sppr & 0x6) << 5) | ((sppr & 0x1) << 4) | spr; } else { /* * the supported rates are: 4,6,8...30 -- GitLab From 93f2976eb0276c932b22da771c90b9b4cd52ee49 Mon Sep 17 00:00:00 2001 From: Dennis Kadioglu Date: Mon, 9 Jan 2017 17:10:46 +0100 Subject: [PATCH 0470/1442] ALSA: usb-audio: Add a quirk for Plantronics BT600 commit 2e40795c3bf344cfb5220d94566205796e3ef19a upstream. Plantronics BT600 does not support reading the sample rate which leads to many lines of "cannot get freq at ep 0x1" and "cannot get freq at ep 0x82". This patch adds the USB ID of the BT600 to quirks.c and avoids those error messages. Signed-off-by: Dennis Kadioglu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 2782155ae3ce..93bb14e7e0f7 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1135,6 +1135,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ + case USB_ID(0x047F, 0x02F7): /* Plantronics BT-600 */ case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ -- GitLab From 3f41ee3a45cb3b2a458e7c4aa69c0638fd745ad2 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 5 Dec 2016 18:27:37 +0200 Subject: [PATCH 0471/1442] drm/i915/gen9: Fix PCODE polling during CDCLK change notification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2c7d0602c815277f7cb7c932b091288710d8aba7 upstream. commit 848496e5902833600f7992f4faa82dc1546051ba Author: Ville Syrjälä Date: Wed Jul 13 16:32:03 2016 +0300 drm/i915: Wait up to 3ms for the pcu to ack the cdclk change request on SKL increased the timeout to match the spec, but we still see a timeout on at least one SKL. A CDCLK change request following the failed one will succeed nevertheless. I could reproduce this problem easily by running kms_pipe_crc_basic in a loop. In all failure cases _wait_for() was pre-empted for >3ms and so in the worst case - when the pre-emption happened right after calculating timeout__ in _wait_for() - we called skl_cdclk_wait_for_pcu_ready() only once which failed and so _wait_for() timed out. As opposed to this the spec says to keep retrying the request for at most a 3ms period. To fix this send the first request explicitly to guarantee that there is 3ms between the first and last request. Though this matches the spec, I noticed that in rare cases this can still time out if we sent only a few requests (in the worst case 2) _and_ PCODE is busy for some reason even after a previous request and a 3ms delay. To work around this retry the polling with pre-emption disabled to maximize the number of requests. Also increase the timeout to 10ms to account for interrupts that could reduce the number of requests. With this change I couldn't trigger the problem. v2: - Use 1ms poll period instead of 10us. (Chris) v3: - Poll with pre-emption disabled to increase the number of request attempts. (Ville, Chris) - Factor out a helper to poll, it's also needed by the next patch. v4: - Pass reply_mask, reply to skl_pcode_request(), instead of assuming the reply is generic. (Ville) v5: - List the request specific timeout values as code comment. (Ville) v6: - Try the poll first with preemption enabled. - Add code comment about first request being queued by PCODE. (Art) - Add timeout_base_ms argument. (Ville) v7: - Clarify code comment about first queued request. (Chris) Cc: Ville Syrjälä Cc: Chris Wilson Cc: Art Runyan Cc: # v4.2- : 3b2c171 : drm/i915: Wait up to 3ms Cc: # v4.2- Fixes: 5d96d8afcfbb ("drm/i915/skl: Deinit/init the display at suspend/resume") Reference: https://bugs.freedesktop.org/show_bug.cgi?id=97929 Testcase: igt/kms_pipe_crc_basic/suspend-read-crc-pipe-B Signed-off-by: Imre Deak Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1480955258-26311-1-git-send-email-imre.deak@intel.com (cherry picked from commit a0b8a1fe34430c3a82258e8cb45f5968bdf31afd) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/intel_display.c | 31 ++++-------- drivers/gpu/drm/i915/intel_pm.c | 75 ++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 685e9e065287..da832d3cdca7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3684,6 +3684,8 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val); int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val); +int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms); /* intel_sideband.c */ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 71441c329603..c9e83f39ec0a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6262,36 +6262,25 @@ skl_dpll0_disable(struct drm_i915_private *dev_priv) dev_priv->cdclk_pll.vco = 0; } -static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv) -{ - int ret; - u32 val; - - /* inform PCU we want to change CDCLK */ - val = SKL_CDCLK_PREPARE_FOR_CHANGE; - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_read(dev_priv, SKL_PCODE_CDCLK_CONTROL, &val); - mutex_unlock(&dev_priv->rps.hw_lock); - - return ret == 0 && (val & SKL_CDCLK_READY_FOR_CHANGE); -} - -static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv) -{ - return _wait_for(skl_cdclk_pcu_ready(dev_priv), 3000, 10) == 0; -} - static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) { struct drm_device *dev = &dev_priv->drm; u32 freq_select, pcu_ack; + int ret; WARN_ON((cdclk == 24000) != (vco == 0)); DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); - if (!skl_cdclk_wait_for_pcu_ready(dev_priv)) { - DRM_ERROR("failed to inform PCU about cdclk change\n"); + mutex_lock(&dev_priv->rps.hw_lock); + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", + ret); return; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 103cefdb9ddd..985cb31f4b44 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7955,6 +7955,81 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, return 0; } +static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, + u32 request, u32 reply_mask, u32 reply, + u32 *status) +{ + u32 val = request; + + *status = sandybridge_pcode_read(dev_priv, mbox, &val); + + return *status || ((val & reply_mask) == reply); +} + +/** + * skl_pcode_request - send PCODE request until acknowledgment + * @dev_priv: device private + * @mbox: PCODE mailbox ID the request is targeted for + * @request: request ID + * @reply_mask: mask used to check for request acknowledgment + * @reply: value used to check for request acknowledgment + * @timeout_base_ms: timeout for polling with preemption enabled + * + * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE + * reports an error or an overall timeout of @timeout_base_ms+10 ms expires. + * The request is acknowledged once the PCODE reply dword equals @reply after + * applying @reply_mask. Polling is first attempted with preemption enabled + * for @timeout_base_ms and if this times out for another 10 ms with + * preemption disabled. + * + * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some + * other error as reported by PCODE. + */ +int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) +{ + u32 status; + int ret; + + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + +#define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \ + &status) + + /* + * Prime the PCODE by doing a request first. Normally it guarantees + * that a subsequent request, at most @timeout_base_ms later, succeeds. + * _wait_for() doesn't guarantee when its passed condition is evaluated + * first, so send the first request explicitly. + */ + if (COND) { + ret = 0; + goto out; + } + ret = _wait_for(COND, timeout_base_ms * 1000, 10); + if (!ret) + goto out; + + /* + * The above can time out if the number of requests was low (2 in the + * worst case) _and_ PCODE was busy for some reason even after a + * (queued) request and @timeout_base_ms delay. As a workaround retry + * the poll with preemption disabled to maximize the number of + * requests. Increase the timeout from @timeout_base_ms to 50ms to + * account for interrupts that could reduce the number of these + * requests. + */ + DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); + WARN_ON_ONCE(timeout_base_ms > 3); + preempt_disable(); + ret = wait_for_atomic(COND, 50); + preempt_enable(); + +out: + return ret ? ret : status; +#undef COND +} + static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) { /* -- GitLab From 7dae85b5c355a55c3ef44ef4dad0a77125802c7f Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 26 Nov 2016 14:43:35 -0600 Subject: [PATCH 0472/1442] rtlwifi: Fix enter/exit power_save commit ba9f93f82abafe2552eac942ebb11c2df4f8dd7f upstream. In commit a5ffbe0a1993 ("rtlwifi: Fix scheduling while atomic bug") and commit a269913c52ad ("rtlwifi: Rework rtl_lps_leave() and rtl_lps_enter() to use work queue"), an error was introduced in the power-save routines due to the fact that leaving PS was delayed by the use of a work queue. This problem is fixed by detecting if the enter or leave routines are in interrupt mode. If so, the workqueue is used to place the request. If in normal mode, the enter or leave routines are called directly. Fixes: a269913c52ad ("rtlwifi: Rework rtl_lps_leave() and rtl_lps_enter() to use work queue") Reported-by: Ping-Ke Shih Signed-off-by: Larry Finger Cc: Stable Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/base.c | 8 ++--- drivers/net/wireless/realtek/rtlwifi/core.c | 9 ++---- drivers/net/wireless/realtek/rtlwifi/pci.c | 14 +++----- drivers/net/wireless/realtek/rtlwifi/ps.c | 36 ++++++++++++++++----- 4 files changed, 40 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 264466f59c57..4ac928bf1f8e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1303,12 +1303,13 @@ EXPORT_SYMBOL_GPL(rtl_action_proc); static void setup_arp_tx(struct rtl_priv *rtlpriv, struct rtl_ps_ctl *ppsc) { + struct ieee80211_hw *hw = rtlpriv->hw; + rtlpriv->ra.is_special_data = true; if (rtlpriv->cfg->ops->get_btc_status()) rtlpriv->btcoexist.btc_ops->btc_special_packet_notify( rtlpriv, 1); - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); ppsc->last_delaylps_stamp_jiffies = jiffies; } @@ -1381,8 +1382,7 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx, if (is_tx) { rtlpriv->ra.is_special_data = true; - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); ppsc->last_delaylps_stamp_jiffies = jiffies; } diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 8e7f23c11680..4da4e458142c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -1150,10 +1150,8 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, } else { mstatus = RT_MEDIA_DISCONNECT; - if (mac->link_state == MAC80211_LINKED) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + if (mac->link_state == MAC80211_LINKED) + rtl_lps_leave(hw); if (ppsc->p2p_ps_info.p2p_ps_mode > P2P_PS_NONE) rtl_p2p_ps_cmd(hw, P2P_PS_DISABLE); mac->link_state = MAC80211_NOLINK; @@ -1431,8 +1429,7 @@ static void rtl_op_sw_scan_start(struct ieee80211_hw *hw, } if (mac->link_state == MAC80211_LINKED) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); mac->link_state = MAC80211_LINKED_SCANNING; } else { rtl_ips_nic_on(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 0dfa9eac3926..5be4fc96002d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -663,11 +663,9 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio) } if (((rtlpriv->link_info.num_rx_inperiod + - rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + rtlpriv->link_info.num_tx_inperiod) > 8) || + (rtlpriv->link_info.num_rx_inperiod > 2)) + rtl_lps_leave(hw); } static int _rtl_pci_init_one_rxdesc(struct ieee80211_hw *hw, @@ -918,10 +916,8 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) } if (((rtlpriv->link_info.num_rx_inperiod + rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + (rtlpriv->link_info.num_rx_inperiod > 2)) + rtl_lps_leave(hw); skb = new_skb; no_new: if (rtlpriv->use_new_trx_flow) { diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index 18d979affc18..d0ffc4d508cf 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -407,8 +407,8 @@ void rtl_lps_set_psmode(struct ieee80211_hw *hw, u8 rt_psmode) } } -/*Enter the leisure power save mode.*/ -void rtl_lps_enter(struct ieee80211_hw *hw) +/* Interrupt safe routine to enter the leisure power save mode.*/ +static void rtl_lps_enter_core(struct ieee80211_hw *hw) { struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); @@ -444,10 +444,9 @@ void rtl_lps_enter(struct ieee80211_hw *hw) spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); } -EXPORT_SYMBOL(rtl_lps_enter); -/*Leave the leisure power save mode.*/ -void rtl_lps_leave(struct ieee80211_hw *hw) +/* Interrupt safe routine to leave the leisure power save mode.*/ +static void rtl_lps_leave_core(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); @@ -477,7 +476,6 @@ void rtl_lps_leave(struct ieee80211_hw *hw) } spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); } -EXPORT_SYMBOL(rtl_lps_leave); /* For sw LPS*/ void rtl_swlps_beacon(struct ieee80211_hw *hw, void *data, unsigned int len) @@ -670,12 +668,34 @@ void rtl_lps_change_work_callback(struct work_struct *work) struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->enter_ps) - rtl_lps_enter(hw); + rtl_lps_enter_core(hw); else - rtl_lps_leave(hw); + rtl_lps_leave_core(hw); } EXPORT_SYMBOL_GPL(rtl_lps_change_work_callback); +void rtl_lps_enter(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + + if (!in_interrupt()) + return rtl_lps_enter_core(hw); + rtlpriv->enter_ps = true; + schedule_work(&rtlpriv->works.lps_change_work); +} +EXPORT_SYMBOL_GPL(rtl_lps_enter); + +void rtl_lps_leave(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + + if (!in_interrupt()) + return rtl_lps_leave_core(hw); + rtlpriv->enter_ps = false; + schedule_work(&rtlpriv->works.lps_change_work); +} +EXPORT_SYMBOL_GPL(rtl_lps_leave); + void rtl_swlps_wq_callback(void *data) { struct rtl_works *rtlworks = container_of_dwork_rtl(data, -- GitLab From 6fea974494af50921a5201ebb30c65dde30a1a2b Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 21 Dec 2016 11:18:55 -0600 Subject: [PATCH 0473/1442] rtlwifi: rtl_usb: Fix missing entry in USB driver's private data commit 60f59ce0278557f7896d5158ae6d12a4855a72cc upstream. These drivers need to be able to reference "struct ieee80211_hw" from the driver's private data, and vice versa. The USB driver failed to store the address of ieee80211_hw in the private data. Although this bug has been present for a long time, it was not exposed until commit ba9f93f82aba ("rtlwifi: Fix enter/exit power_save"). Fixes: ba9f93f82aba ("rtlwifi: Fix enter/exit power_save") Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 32aa5c1d070a..3837bbdecf05 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1067,6 +1067,7 @@ int rtl_usb_probe(struct usb_interface *intf, return -ENOMEM; } rtlpriv = hw->priv; + rtlpriv->hw = hw; rtlpriv->usb_data = kzalloc(RTL_USB_MAX_RX_COUNT * sizeof(u32), GFP_KERNEL); if (!rtlpriv->usb_data) -- GitLab From 75353ac8ff437322ca5520b28d9f9b4b41b39bd6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 15 Jan 2017 13:43:07 +0100 Subject: [PATCH 0474/1442] Linux 4.9.4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ae42a0aaab06..9175706bfe7f 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 5975358bf2ad7e3153ac34b56b5a1abe58075d76 Mon Sep 17 00:00:00 2001 From: Pavel Rojtberg Date: Tue, 27 Dec 2016 11:44:51 -0800 Subject: [PATCH 0475/1442] Input: xpad - use correct product id for x360w controllers commit b6fc513da50c5dbc457a8ad6b58b046a6a68fd9d upstream. currently the controllers get the same product id as the wireless receiver. However the controllers actually have their own product id. The patch makes the driver expose the same product id as the windows driver. This improves compatibility when running applications with WINE. see https://github.com/paroj/xpad/issues/54 Signed-off-by: Pavel Rojtberg Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 83af17ad0f1f..bbe15243b8e7 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -1376,6 +1376,12 @@ static int xpad_init_input(struct usb_xpad *xpad) input_dev->name = xpad->name; input_dev->phys = xpad->phys; usb_to_input_id(xpad->udev, &input_dev->id); + + if (xpad->xtype == XTYPE_XBOX360W) { + /* x360w controllers and the receiver have different ids */ + input_dev->id.product = 0x02a1; + } + input_dev->dev.parent = &xpad->intf->dev; input_set_drvdata(input_dev, xpad); -- GitLab From f34fbb92fe81ed486d2d84af4ec572b5e9b0f9aa Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Sun, 18 Dec 2016 15:26:12 -0800 Subject: [PATCH 0476/1442] Input: i8042 - add Pegatron touchpad to noloop table commit 41c567a5d7d1a986763e58c3394782813c3bcb03 upstream. Avoid AUX loopback in Pegatron C15B touchpad, so input subsystem is able to recognize a Synaptics touchpad in the AUX port. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=93791 (Touchpad is not detected on DNS 0801480 notebook (PEGATRON C15B)) Suggested-by: Dmitry Torokhov Signed-off-by: Marcos Paulo de Souza Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-x86ia64io.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 073246c7d163..0cdd95801a25 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -211,6 +211,12 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Rev 1"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"), + DMI_MATCH(DMI_PRODUCT_NAME, "C15B"), + }, + }, { } }; -- GitLab From b01bbf221fcc26764c8845c7c6ebd66c230b9157 Mon Sep 17 00:00:00 2001 From: Gary Bisson Date: Thu, 24 Nov 2016 19:11:51 +0100 Subject: [PATCH 0477/1442] pinctrl: imx: fix imx_pinctrl_desc initialization commit 8f5983ad6b81070376db9487ce81000c85a16027 upstream. Fixes: 6e408ed8be0e ("pinctrl: imx: fix initialization of imx_pinctrl_desc") Reviewed-by: Vladimir Zapolskiy Reviewed-by: Peng Fan Signed-off-by: Gary Bisson Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Walleij --- drivers/pinctrl/freescale/pinctrl-imx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index 79c4e14a5a75..5ef7e875b50e 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -778,10 +778,10 @@ int imx_pinctrl_probe(struct platform_device *pdev, imx_pinctrl_desc->name = dev_name(&pdev->dev); imx_pinctrl_desc->pins = info->pins; imx_pinctrl_desc->npins = info->npins; - imx_pinctrl_desc->pctlops = &imx_pctrl_ops, - imx_pinctrl_desc->pmxops = &imx_pmx_ops, - imx_pinctrl_desc->confops = &imx_pinconf_ops, - imx_pinctrl_desc->owner = THIS_MODULE, + imx_pinctrl_desc->pctlops = &imx_pctrl_ops; + imx_pinctrl_desc->pmxops = &imx_pmx_ops; + imx_pinctrl_desc->confops = &imx_pinconf_ops; + imx_pinctrl_desc->owner = THIS_MODULE; ret = imx_pinctrl_probe_dt(pdev, info); if (ret) { -- GitLab From 5e1595223a2c703b810c567b3071a6cc87af5890 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Sat, 12 Nov 2016 17:04:27 +0100 Subject: [PATCH 0478/1442] pinctrl: sh-pfc: r8a7795: Use lookup function for bias data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d3b861bccdee2fa9963a2b6c64f74a8d752b9315 upstream. There is a bug in the r8a7795 bias code where a WARN() is trigged anytime a pin from PUEN0/PUD0 is accessed. # cat /sys/kernel/debug/pinctrl/e6060000.pfc/pinconf-pins WARNING: CPU: 2 PID: 2391 at drivers/pinctrl/sh-pfc/pfc-r8a7795.c:5364 r8a7795_pinmux_get_bias+0xbc/0xc8 [..] Call trace: [] r8a7795_pinmux_get_bias+0xbc/0xc8 [] sh_pfc_pinconf_get+0x194/0x270 [] pin_config_get_for_pin+0x20/0x30 [] pinconf_generic_dump_one+0x168/0x188 [] pinconf_generic_dump_pins+0x5c/0x98 [] pinconf_pins_show+0xc8/0x128 [] seq_read+0x16c/0x420 [] full_proxy_read+0x58/0x88 [] __vfs_read+0x1c/0xf8 [] vfs_read+0x84/0x148 [] SyS_read+0x44/0xa0 [] __sys_trace_return+0x0/0x4 This is due to the WARN() check if the reg field of the pullups struct is zero, and this should be 0 for pins controlled by the PUEN0/PUD0 registers since PU0 is defined as 0. Change the data structure and use the generic sh_pfc_pin_to_bias_info() function to get the register offset and bit information. Fixes: 560655247b627ac7 ("pinctrl: sh-pfc: r8a7795: Add bias pinconf support") Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sh-pfc/pfc-r8a7795.c | 343 ++++++++++++++------------- 1 file changed, 172 insertions(+), 171 deletions(-) diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7795.c b/drivers/pinctrl/sh-pfc/pfc-r8a7795.c index 2e8cc2adbed7..84cee66b1e08 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7795.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7795.c @@ -5188,184 +5188,183 @@ static int r8a7795_pin_to_pocctrl(struct sh_pfc *pfc, unsigned int pin, u32 *poc #define PU5 0x14 #define PU6 0x18 -static const struct { - u16 reg : 11; - u16 bit : 5; -} pullups[] = { - [RCAR_GP_PIN(2, 11)] = { PU0, 31 }, /* AVB_PHY_INT */ - [RCAR_GP_PIN(2, 10)] = { PU0, 30 }, /* AVB_MAGIC */ - [RCAR_GP_PIN(2, 9)] = { PU0, 29 }, /* AVB_MDC */ - - [RCAR_GP_PIN(1, 19)] = { PU1, 31 }, /* A19 */ - [RCAR_GP_PIN(1, 18)] = { PU1, 30 }, /* A18 */ - [RCAR_GP_PIN(1, 17)] = { PU1, 29 }, /* A17 */ - [RCAR_GP_PIN(1, 16)] = { PU1, 28 }, /* A16 */ - [RCAR_GP_PIN(1, 15)] = { PU1, 27 }, /* A15 */ - [RCAR_GP_PIN(1, 14)] = { PU1, 26 }, /* A14 */ - [RCAR_GP_PIN(1, 13)] = { PU1, 25 }, /* A13 */ - [RCAR_GP_PIN(1, 12)] = { PU1, 24 }, /* A12 */ - [RCAR_GP_PIN(1, 11)] = { PU1, 23 }, /* A11 */ - [RCAR_GP_PIN(1, 10)] = { PU1, 22 }, /* A10 */ - [RCAR_GP_PIN(1, 9)] = { PU1, 21 }, /* A9 */ - [RCAR_GP_PIN(1, 8)] = { PU1, 20 }, /* A8 */ - [RCAR_GP_PIN(1, 7)] = { PU1, 19 }, /* A7 */ - [RCAR_GP_PIN(1, 6)] = { PU1, 18 }, /* A6 */ - [RCAR_GP_PIN(1, 5)] = { PU1, 17 }, /* A5 */ - [RCAR_GP_PIN(1, 4)] = { PU1, 16 }, /* A4 */ - [RCAR_GP_PIN(1, 3)] = { PU1, 15 }, /* A3 */ - [RCAR_GP_PIN(1, 2)] = { PU1, 14 }, /* A2 */ - [RCAR_GP_PIN(1, 1)] = { PU1, 13 }, /* A1 */ - [RCAR_GP_PIN(1, 0)] = { PU1, 12 }, /* A0 */ - [RCAR_GP_PIN(2, 8)] = { PU1, 11 }, /* PWM2_A */ - [RCAR_GP_PIN(2, 7)] = { PU1, 10 }, /* PWM1_A */ - [RCAR_GP_PIN(2, 6)] = { PU1, 9 }, /* PWM0 */ - [RCAR_GP_PIN(2, 5)] = { PU1, 8 }, /* IRQ5 */ - [RCAR_GP_PIN(2, 4)] = { PU1, 7 }, /* IRQ4 */ - [RCAR_GP_PIN(2, 3)] = { PU1, 6 }, /* IRQ3 */ - [RCAR_GP_PIN(2, 2)] = { PU1, 5 }, /* IRQ2 */ - [RCAR_GP_PIN(2, 1)] = { PU1, 4 }, /* IRQ1 */ - [RCAR_GP_PIN(2, 0)] = { PU1, 3 }, /* IRQ0 */ - [RCAR_GP_PIN(2, 14)] = { PU1, 2 }, /* AVB_AVTP_CAPTURE_A */ - [RCAR_GP_PIN(2, 13)] = { PU1, 1 }, /* AVB_AVTP_MATCH_A */ - [RCAR_GP_PIN(2, 12)] = { PU1, 0 }, /* AVB_LINK */ - - [RCAR_GP_PIN(7, 3)] = { PU2, 29 }, /* HDMI1_CEC */ - [RCAR_GP_PIN(7, 2)] = { PU2, 28 }, /* HDMI0_CEC */ - [RCAR_GP_PIN(7, 1)] = { PU2, 27 }, /* AVS2 */ - [RCAR_GP_PIN(7, 0)] = { PU2, 26 }, /* AVS1 */ - [RCAR_GP_PIN(0, 15)] = { PU2, 25 }, /* D15 */ - [RCAR_GP_PIN(0, 14)] = { PU2, 24 }, /* D14 */ - [RCAR_GP_PIN(0, 13)] = { PU2, 23 }, /* D13 */ - [RCAR_GP_PIN(0, 12)] = { PU2, 22 }, /* D12 */ - [RCAR_GP_PIN(0, 11)] = { PU2, 21 }, /* D11 */ - [RCAR_GP_PIN(0, 10)] = { PU2, 20 }, /* D10 */ - [RCAR_GP_PIN(0, 9)] = { PU2, 19 }, /* D9 */ - [RCAR_GP_PIN(0, 8)] = { PU2, 18 }, /* D8 */ - [RCAR_GP_PIN(0, 7)] = { PU2, 17 }, /* D7 */ - [RCAR_GP_PIN(0, 6)] = { PU2, 16 }, /* D6 */ - [RCAR_GP_PIN(0, 5)] = { PU2, 15 }, /* D5 */ - [RCAR_GP_PIN(0, 4)] = { PU2, 14 }, /* D4 */ - [RCAR_GP_PIN(0, 3)] = { PU2, 13 }, /* D3 */ - [RCAR_GP_PIN(0, 2)] = { PU2, 12 }, /* D2 */ - [RCAR_GP_PIN(0, 1)] = { PU2, 11 }, /* D1 */ - [RCAR_GP_PIN(0, 0)] = { PU2, 10 }, /* D0 */ - [RCAR_GP_PIN(1, 27)] = { PU2, 8 }, /* EX_WAIT0_A */ - [RCAR_GP_PIN(1, 26)] = { PU2, 7 }, /* WE1_N */ - [RCAR_GP_PIN(1, 25)] = { PU2, 6 }, /* WE0_N */ - [RCAR_GP_PIN(1, 24)] = { PU2, 5 }, /* RD_WR_N */ - [RCAR_GP_PIN(1, 23)] = { PU2, 4 }, /* RD_N */ - [RCAR_GP_PIN(1, 22)] = { PU2, 3 }, /* BS_N */ - [RCAR_GP_PIN(1, 21)] = { PU2, 2 }, /* CS1_N_A26 */ - [RCAR_GP_PIN(1, 20)] = { PU2, 1 }, /* CS0_N */ - - [RCAR_GP_PIN(4, 9)] = { PU3, 31 }, /* SD3_DAT0 */ - [RCAR_GP_PIN(4, 8)] = { PU3, 30 }, /* SD3_CMD */ - [RCAR_GP_PIN(4, 7)] = { PU3, 29 }, /* SD3_CLK */ - [RCAR_GP_PIN(4, 6)] = { PU3, 28 }, /* SD2_DS */ - [RCAR_GP_PIN(4, 5)] = { PU3, 27 }, /* SD2_DAT3 */ - [RCAR_GP_PIN(4, 4)] = { PU3, 26 }, /* SD2_DAT2 */ - [RCAR_GP_PIN(4, 3)] = { PU3, 25 }, /* SD2_DAT1 */ - [RCAR_GP_PIN(4, 2)] = { PU3, 24 }, /* SD2_DAT0 */ - [RCAR_GP_PIN(4, 1)] = { PU3, 23 }, /* SD2_CMD */ - [RCAR_GP_PIN(4, 0)] = { PU3, 22 }, /* SD2_CLK */ - [RCAR_GP_PIN(3, 11)] = { PU3, 21 }, /* SD1_DAT3 */ - [RCAR_GP_PIN(3, 10)] = { PU3, 20 }, /* SD1_DAT2 */ - [RCAR_GP_PIN(3, 9)] = { PU3, 19 }, /* SD1_DAT1 */ - [RCAR_GP_PIN(3, 8)] = { PU3, 18 }, /* SD1_DAT0 */ - [RCAR_GP_PIN(3, 7)] = { PU3, 17 }, /* SD1_CMD */ - [RCAR_GP_PIN(3, 6)] = { PU3, 16 }, /* SD1_CLK */ - [RCAR_GP_PIN(3, 5)] = { PU3, 15 }, /* SD0_DAT3 */ - [RCAR_GP_PIN(3, 4)] = { PU3, 14 }, /* SD0_DAT2 */ - [RCAR_GP_PIN(3, 3)] = { PU3, 13 }, /* SD0_DAT1 */ - [RCAR_GP_PIN(3, 2)] = { PU3, 12 }, /* SD0_DAT0 */ - [RCAR_GP_PIN(3, 1)] = { PU3, 11 }, /* SD0_CMD */ - [RCAR_GP_PIN(3, 0)] = { PU3, 10 }, /* SD0_CLK */ - - [RCAR_GP_PIN(5, 19)] = { PU4, 31 }, /* MSIOF0_SS1 */ - [RCAR_GP_PIN(5, 18)] = { PU4, 30 }, /* MSIOF0_SYNC */ - [RCAR_GP_PIN(5, 17)] = { PU4, 29 }, /* MSIOF0_SCK */ - [RCAR_GP_PIN(5, 16)] = { PU4, 28 }, /* HRTS0_N */ - [RCAR_GP_PIN(5, 15)] = { PU4, 27 }, /* HCTS0_N */ - [RCAR_GP_PIN(5, 14)] = { PU4, 26 }, /* HTX0 */ - [RCAR_GP_PIN(5, 13)] = { PU4, 25 }, /* HRX0 */ - [RCAR_GP_PIN(5, 12)] = { PU4, 24 }, /* HSCK0 */ - [RCAR_GP_PIN(5, 11)] = { PU4, 23 }, /* RX2_A */ - [RCAR_GP_PIN(5, 10)] = { PU4, 22 }, /* TX2_A */ - [RCAR_GP_PIN(5, 9)] = { PU4, 21 }, /* SCK2 */ - [RCAR_GP_PIN(5, 8)] = { PU4, 20 }, /* RTS1_N_TANS */ - [RCAR_GP_PIN(5, 7)] = { PU4, 19 }, /* CTS1_N */ - [RCAR_GP_PIN(5, 6)] = { PU4, 18 }, /* TX1_A */ - [RCAR_GP_PIN(5, 5)] = { PU4, 17 }, /* RX1_A */ - [RCAR_GP_PIN(5, 4)] = { PU4, 16 }, /* RTS0_N_TANS */ - [RCAR_GP_PIN(5, 3)] = { PU4, 15 }, /* CTS0_N */ - [RCAR_GP_PIN(5, 2)] = { PU4, 14 }, /* TX0 */ - [RCAR_GP_PIN(5, 1)] = { PU4, 13 }, /* RX0 */ - [RCAR_GP_PIN(5, 0)] = { PU4, 12 }, /* SCK0 */ - [RCAR_GP_PIN(3, 15)] = { PU4, 11 }, /* SD1_WP */ - [RCAR_GP_PIN(3, 14)] = { PU4, 10 }, /* SD1_CD */ - [RCAR_GP_PIN(3, 13)] = { PU4, 9 }, /* SD0_WP */ - [RCAR_GP_PIN(3, 12)] = { PU4, 8 }, /* SD0_CD */ - [RCAR_GP_PIN(4, 17)] = { PU4, 7 }, /* SD3_DS */ - [RCAR_GP_PIN(4, 16)] = { PU4, 6 }, /* SD3_DAT7 */ - [RCAR_GP_PIN(4, 15)] = { PU4, 5 }, /* SD3_DAT6 */ - [RCAR_GP_PIN(4, 14)] = { PU4, 4 }, /* SD3_DAT5 */ - [RCAR_GP_PIN(4, 13)] = { PU4, 3 }, /* SD3_DAT4 */ - [RCAR_GP_PIN(4, 12)] = { PU4, 2 }, /* SD3_DAT3 */ - [RCAR_GP_PIN(4, 11)] = { PU4, 1 }, /* SD3_DAT2 */ - [RCAR_GP_PIN(4, 10)] = { PU4, 0 }, /* SD3_DAT1 */ - - [RCAR_GP_PIN(6, 24)] = { PU5, 31 }, /* USB0_PWEN */ - [RCAR_GP_PIN(6, 23)] = { PU5, 30 }, /* AUDIO_CLKB_B */ - [RCAR_GP_PIN(6, 22)] = { PU5, 29 }, /* AUDIO_CLKA_A */ - [RCAR_GP_PIN(6, 21)] = { PU5, 28 }, /* SSI_SDATA9_A */ - [RCAR_GP_PIN(6, 20)] = { PU5, 27 }, /* SSI_SDATA8 */ - [RCAR_GP_PIN(6, 19)] = { PU5, 26 }, /* SSI_SDATA7 */ - [RCAR_GP_PIN(6, 18)] = { PU5, 25 }, /* SSI_WS78 */ - [RCAR_GP_PIN(6, 17)] = { PU5, 24 }, /* SSI_SCK78 */ - [RCAR_GP_PIN(6, 16)] = { PU5, 23 }, /* SSI_SDATA6 */ - [RCAR_GP_PIN(6, 15)] = { PU5, 22 }, /* SSI_WS6 */ - [RCAR_GP_PIN(6, 14)] = { PU5, 21 }, /* SSI_SCK6 */ - [RCAR_GP_PIN(6, 13)] = { PU5, 20 }, /* SSI_SDATA5 */ - [RCAR_GP_PIN(6, 12)] = { PU5, 19 }, /* SSI_WS5 */ - [RCAR_GP_PIN(6, 11)] = { PU5, 18 }, /* SSI_SCK5 */ - [RCAR_GP_PIN(6, 10)] = { PU5, 17 }, /* SSI_SDATA4 */ - [RCAR_GP_PIN(6, 9)] = { PU5, 16 }, /* SSI_WS4 */ - [RCAR_GP_PIN(6, 8)] = { PU5, 15 }, /* SSI_SCK4 */ - [RCAR_GP_PIN(6, 7)] = { PU5, 14 }, /* SSI_SDATA3 */ - [RCAR_GP_PIN(6, 6)] = { PU5, 13 }, /* SSI_WS34 */ - [RCAR_GP_PIN(6, 5)] = { PU5, 12 }, /* SSI_SCK34 */ - [RCAR_GP_PIN(6, 4)] = { PU5, 11 }, /* SSI_SDATA2_A */ - [RCAR_GP_PIN(6, 3)] = { PU5, 10 }, /* SSI_SDATA1_A */ - [RCAR_GP_PIN(6, 2)] = { PU5, 9 }, /* SSI_SDATA0 */ - [RCAR_GP_PIN(6, 1)] = { PU5, 8 }, /* SSI_WS01239 */ - [RCAR_GP_PIN(6, 0)] = { PU5, 7 }, /* SSI_SCK01239 */ - [RCAR_GP_PIN(5, 25)] = { PU5, 5 }, /* MLB_DAT */ - [RCAR_GP_PIN(5, 24)] = { PU5, 4 }, /* MLB_SIG */ - [RCAR_GP_PIN(5, 23)] = { PU5, 3 }, /* MLB_CLK */ - [RCAR_GP_PIN(5, 22)] = { PU5, 2 }, /* MSIOF0_RXD */ - [RCAR_GP_PIN(5, 21)] = { PU5, 1 }, /* MSIOF0_SS2 */ - [RCAR_GP_PIN(5, 20)] = { PU5, 0 }, /* MSIOF0_TXD */ - - [RCAR_GP_PIN(6, 31)] = { PU6, 6 }, /* USB31_OVC */ - [RCAR_GP_PIN(6, 30)] = { PU6, 5 }, /* USB31_PWEN */ - [RCAR_GP_PIN(6, 29)] = { PU6, 4 }, /* USB30_OVC */ - [RCAR_GP_PIN(6, 28)] = { PU6, 3 }, /* USB30_PWEN */ - [RCAR_GP_PIN(6, 27)] = { PU6, 2 }, /* USB1_OVC */ - [RCAR_GP_PIN(6, 26)] = { PU6, 1 }, /* USB1_PWEN */ - [RCAR_GP_PIN(6, 25)] = { PU6, 0 }, /* USB0_OVC */ +static const struct sh_pfc_bias_info bias_info[] = { + { RCAR_GP_PIN(2, 11), PU0, 31 }, /* AVB_PHY_INT */ + { RCAR_GP_PIN(2, 10), PU0, 30 }, /* AVB_MAGIC */ + { RCAR_GP_PIN(2, 9), PU0, 29 }, /* AVB_MDC */ + + { RCAR_GP_PIN(1, 19), PU1, 31 }, /* A19 */ + { RCAR_GP_PIN(1, 18), PU1, 30 }, /* A18 */ + { RCAR_GP_PIN(1, 17), PU1, 29 }, /* A17 */ + { RCAR_GP_PIN(1, 16), PU1, 28 }, /* A16 */ + { RCAR_GP_PIN(1, 15), PU1, 27 }, /* A15 */ + { RCAR_GP_PIN(1, 14), PU1, 26 }, /* A14 */ + { RCAR_GP_PIN(1, 13), PU1, 25 }, /* A13 */ + { RCAR_GP_PIN(1, 12), PU1, 24 }, /* A12 */ + { RCAR_GP_PIN(1, 11), PU1, 23 }, /* A11 */ + { RCAR_GP_PIN(1, 10), PU1, 22 }, /* A10 */ + { RCAR_GP_PIN(1, 9), PU1, 21 }, /* A9 */ + { RCAR_GP_PIN(1, 8), PU1, 20 }, /* A8 */ + { RCAR_GP_PIN(1, 7), PU1, 19 }, /* A7 */ + { RCAR_GP_PIN(1, 6), PU1, 18 }, /* A6 */ + { RCAR_GP_PIN(1, 5), PU1, 17 }, /* A5 */ + { RCAR_GP_PIN(1, 4), PU1, 16 }, /* A4 */ + { RCAR_GP_PIN(1, 3), PU1, 15 }, /* A3 */ + { RCAR_GP_PIN(1, 2), PU1, 14 }, /* A2 */ + { RCAR_GP_PIN(1, 1), PU1, 13 }, /* A1 */ + { RCAR_GP_PIN(1, 0), PU1, 12 }, /* A0 */ + { RCAR_GP_PIN(2, 8), PU1, 11 }, /* PWM2_A */ + { RCAR_GP_PIN(2, 7), PU1, 10 }, /* PWM1_A */ + { RCAR_GP_PIN(2, 6), PU1, 9 }, /* PWM0 */ + { RCAR_GP_PIN(2, 5), PU1, 8 }, /* IRQ5 */ + { RCAR_GP_PIN(2, 4), PU1, 7 }, /* IRQ4 */ + { RCAR_GP_PIN(2, 3), PU1, 6 }, /* IRQ3 */ + { RCAR_GP_PIN(2, 2), PU1, 5 }, /* IRQ2 */ + { RCAR_GP_PIN(2, 1), PU1, 4 }, /* IRQ1 */ + { RCAR_GP_PIN(2, 0), PU1, 3 }, /* IRQ0 */ + { RCAR_GP_PIN(2, 14), PU1, 2 }, /* AVB_AVTP_CAPTURE_A */ + { RCAR_GP_PIN(2, 13), PU1, 1 }, /* AVB_AVTP_MATCH_A */ + { RCAR_GP_PIN(2, 12), PU1, 0 }, /* AVB_LINK */ + + { RCAR_GP_PIN(7, 3), PU2, 29 }, /* HDMI1_CEC */ + { RCAR_GP_PIN(7, 2), PU2, 28 }, /* HDMI0_CEC */ + { RCAR_GP_PIN(7, 1), PU2, 27 }, /* AVS2 */ + { RCAR_GP_PIN(7, 0), PU2, 26 }, /* AVS1 */ + { RCAR_GP_PIN(0, 15), PU2, 25 }, /* D15 */ + { RCAR_GP_PIN(0, 14), PU2, 24 }, /* D14 */ + { RCAR_GP_PIN(0, 13), PU2, 23 }, /* D13 */ + { RCAR_GP_PIN(0, 12), PU2, 22 }, /* D12 */ + { RCAR_GP_PIN(0, 11), PU2, 21 }, /* D11 */ + { RCAR_GP_PIN(0, 10), PU2, 20 }, /* D10 */ + { RCAR_GP_PIN(0, 9), PU2, 19 }, /* D9 */ + { RCAR_GP_PIN(0, 8), PU2, 18 }, /* D8 */ + { RCAR_GP_PIN(0, 7), PU2, 17 }, /* D7 */ + { RCAR_GP_PIN(0, 6), PU2, 16 }, /* D6 */ + { RCAR_GP_PIN(0, 5), PU2, 15 }, /* D5 */ + { RCAR_GP_PIN(0, 4), PU2, 14 }, /* D4 */ + { RCAR_GP_PIN(0, 3), PU2, 13 }, /* D3 */ + { RCAR_GP_PIN(0, 2), PU2, 12 }, /* D2 */ + { RCAR_GP_PIN(0, 1), PU2, 11 }, /* D1 */ + { RCAR_GP_PIN(0, 0), PU2, 10 }, /* D0 */ + { RCAR_GP_PIN(1, 27), PU2, 8 }, /* EX_WAIT0_A */ + { RCAR_GP_PIN(1, 26), PU2, 7 }, /* WE1_N */ + { RCAR_GP_PIN(1, 25), PU2, 6 }, /* WE0_N */ + { RCAR_GP_PIN(1, 24), PU2, 5 }, /* RD_WR_N */ + { RCAR_GP_PIN(1, 23), PU2, 4 }, /* RD_N */ + { RCAR_GP_PIN(1, 22), PU2, 3 }, /* BS_N */ + { RCAR_GP_PIN(1, 21), PU2, 2 }, /* CS1_N_A26 */ + { RCAR_GP_PIN(1, 20), PU2, 1 }, /* CS0_N */ + + { RCAR_GP_PIN(4, 9), PU3, 31 }, /* SD3_DAT0 */ + { RCAR_GP_PIN(4, 8), PU3, 30 }, /* SD3_CMD */ + { RCAR_GP_PIN(4, 7), PU3, 29 }, /* SD3_CLK */ + { RCAR_GP_PIN(4, 6), PU3, 28 }, /* SD2_DS */ + { RCAR_GP_PIN(4, 5), PU3, 27 }, /* SD2_DAT3 */ + { RCAR_GP_PIN(4, 4), PU3, 26 }, /* SD2_DAT2 */ + { RCAR_GP_PIN(4, 3), PU3, 25 }, /* SD2_DAT1 */ + { RCAR_GP_PIN(4, 2), PU3, 24 }, /* SD2_DAT0 */ + { RCAR_GP_PIN(4, 1), PU3, 23 }, /* SD2_CMD */ + { RCAR_GP_PIN(4, 0), PU3, 22 }, /* SD2_CLK */ + { RCAR_GP_PIN(3, 11), PU3, 21 }, /* SD1_DAT3 */ + { RCAR_GP_PIN(3, 10), PU3, 20 }, /* SD1_DAT2 */ + { RCAR_GP_PIN(3, 9), PU3, 19 }, /* SD1_DAT1 */ + { RCAR_GP_PIN(3, 8), PU3, 18 }, /* SD1_DAT0 */ + { RCAR_GP_PIN(3, 7), PU3, 17 }, /* SD1_CMD */ + { RCAR_GP_PIN(3, 6), PU3, 16 }, /* SD1_CLK */ + { RCAR_GP_PIN(3, 5), PU3, 15 }, /* SD0_DAT3 */ + { RCAR_GP_PIN(3, 4), PU3, 14 }, /* SD0_DAT2 */ + { RCAR_GP_PIN(3, 3), PU3, 13 }, /* SD0_DAT1 */ + { RCAR_GP_PIN(3, 2), PU3, 12 }, /* SD0_DAT0 */ + { RCAR_GP_PIN(3, 1), PU3, 11 }, /* SD0_CMD */ + { RCAR_GP_PIN(3, 0), PU3, 10 }, /* SD0_CLK */ + + { RCAR_GP_PIN(5, 19), PU4, 31 }, /* MSIOF0_SS1 */ + { RCAR_GP_PIN(5, 18), PU4, 30 }, /* MSIOF0_SYNC */ + { RCAR_GP_PIN(5, 17), PU4, 29 }, /* MSIOF0_SCK */ + { RCAR_GP_PIN(5, 16), PU4, 28 }, /* HRTS0_N */ + { RCAR_GP_PIN(5, 15), PU4, 27 }, /* HCTS0_N */ + { RCAR_GP_PIN(5, 14), PU4, 26 }, /* HTX0 */ + { RCAR_GP_PIN(5, 13), PU4, 25 }, /* HRX0 */ + { RCAR_GP_PIN(5, 12), PU4, 24 }, /* HSCK0 */ + { RCAR_GP_PIN(5, 11), PU4, 23 }, /* RX2_A */ + { RCAR_GP_PIN(5, 10), PU4, 22 }, /* TX2_A */ + { RCAR_GP_PIN(5, 9), PU4, 21 }, /* SCK2 */ + { RCAR_GP_PIN(5, 8), PU4, 20 }, /* RTS1_N_TANS */ + { RCAR_GP_PIN(5, 7), PU4, 19 }, /* CTS1_N */ + { RCAR_GP_PIN(5, 6), PU4, 18 }, /* TX1_A */ + { RCAR_GP_PIN(5, 5), PU4, 17 }, /* RX1_A */ + { RCAR_GP_PIN(5, 4), PU4, 16 }, /* RTS0_N_TANS */ + { RCAR_GP_PIN(5, 3), PU4, 15 }, /* CTS0_N */ + { RCAR_GP_PIN(5, 2), PU4, 14 }, /* TX0 */ + { RCAR_GP_PIN(5, 1), PU4, 13 }, /* RX0 */ + { RCAR_GP_PIN(5, 0), PU4, 12 }, /* SCK0 */ + { RCAR_GP_PIN(3, 15), PU4, 11 }, /* SD1_WP */ + { RCAR_GP_PIN(3, 14), PU4, 10 }, /* SD1_CD */ + { RCAR_GP_PIN(3, 13), PU4, 9 }, /* SD0_WP */ + { RCAR_GP_PIN(3, 12), PU4, 8 }, /* SD0_CD */ + { RCAR_GP_PIN(4, 17), PU4, 7 }, /* SD3_DS */ + { RCAR_GP_PIN(4, 16), PU4, 6 }, /* SD3_DAT7 */ + { RCAR_GP_PIN(4, 15), PU4, 5 }, /* SD3_DAT6 */ + { RCAR_GP_PIN(4, 14), PU4, 4 }, /* SD3_DAT5 */ + { RCAR_GP_PIN(4, 13), PU4, 3 }, /* SD3_DAT4 */ + { RCAR_GP_PIN(4, 12), PU4, 2 }, /* SD3_DAT3 */ + { RCAR_GP_PIN(4, 11), PU4, 1 }, /* SD3_DAT2 */ + { RCAR_GP_PIN(4, 10), PU4, 0 }, /* SD3_DAT1 */ + + { RCAR_GP_PIN(6, 24), PU5, 31 }, /* USB0_PWEN */ + { RCAR_GP_PIN(6, 23), PU5, 30 }, /* AUDIO_CLKB_B */ + { RCAR_GP_PIN(6, 22), PU5, 29 }, /* AUDIO_CLKA_A */ + { RCAR_GP_PIN(6, 21), PU5, 28 }, /* SSI_SDATA9_A */ + { RCAR_GP_PIN(6, 20), PU5, 27 }, /* SSI_SDATA8 */ + { RCAR_GP_PIN(6, 19), PU5, 26 }, /* SSI_SDATA7 */ + { RCAR_GP_PIN(6, 18), PU5, 25 }, /* SSI_WS78 */ + { RCAR_GP_PIN(6, 17), PU5, 24 }, /* SSI_SCK78 */ + { RCAR_GP_PIN(6, 16), PU5, 23 }, /* SSI_SDATA6 */ + { RCAR_GP_PIN(6, 15), PU5, 22 }, /* SSI_WS6 */ + { RCAR_GP_PIN(6, 14), PU5, 21 }, /* SSI_SCK6 */ + { RCAR_GP_PIN(6, 13), PU5, 20 }, /* SSI_SDATA5 */ + { RCAR_GP_PIN(6, 12), PU5, 19 }, /* SSI_WS5 */ + { RCAR_GP_PIN(6, 11), PU5, 18 }, /* SSI_SCK5 */ + { RCAR_GP_PIN(6, 10), PU5, 17 }, /* SSI_SDATA4 */ + { RCAR_GP_PIN(6, 9), PU5, 16 }, /* SSI_WS4 */ + { RCAR_GP_PIN(6, 8), PU5, 15 }, /* SSI_SCK4 */ + { RCAR_GP_PIN(6, 7), PU5, 14 }, /* SSI_SDATA3 */ + { RCAR_GP_PIN(6, 6), PU5, 13 }, /* SSI_WS34 */ + { RCAR_GP_PIN(6, 5), PU5, 12 }, /* SSI_SCK34 */ + { RCAR_GP_PIN(6, 4), PU5, 11 }, /* SSI_SDATA2_A */ + { RCAR_GP_PIN(6, 3), PU5, 10 }, /* SSI_SDATA1_A */ + { RCAR_GP_PIN(6, 2), PU5, 9 }, /* SSI_SDATA0 */ + { RCAR_GP_PIN(6, 1), PU5, 8 }, /* SSI_WS01239 */ + { RCAR_GP_PIN(6, 0), PU5, 7 }, /* SSI_SCK01239 */ + { RCAR_GP_PIN(5, 25), PU5, 5 }, /* MLB_DAT */ + { RCAR_GP_PIN(5, 24), PU5, 4 }, /* MLB_SIG */ + { RCAR_GP_PIN(5, 23), PU5, 3 }, /* MLB_CLK */ + { RCAR_GP_PIN(5, 22), PU5, 2 }, /* MSIOF0_RXD */ + { RCAR_GP_PIN(5, 21), PU5, 1 }, /* MSIOF0_SS2 */ + { RCAR_GP_PIN(5, 20), PU5, 0 }, /* MSIOF0_TXD */ + + { RCAR_GP_PIN(6, 31), PU6, 6 }, /* USB31_OVC */ + { RCAR_GP_PIN(6, 30), PU6, 5 }, /* USB31_PWEN */ + { RCAR_GP_PIN(6, 29), PU6, 4 }, /* USB30_OVC */ + { RCAR_GP_PIN(6, 28), PU6, 3 }, /* USB30_PWEN */ + { RCAR_GP_PIN(6, 27), PU6, 2 }, /* USB1_OVC */ + { RCAR_GP_PIN(6, 26), PU6, 1 }, /* USB1_PWEN */ + { RCAR_GP_PIN(6, 25), PU6, 0 }, /* USB0_OVC */ }; static unsigned int r8a7795_pinmux_get_bias(struct sh_pfc *pfc, unsigned int pin) { + const struct sh_pfc_bias_info *info; u32 reg; u32 bit; - if (WARN_ON_ONCE(!pullups[pin].reg)) + info = sh_pfc_pin_to_bias_info(bias_info, ARRAY_SIZE(bias_info), pin); + if (!info) return PIN_CONFIG_BIAS_DISABLE; - reg = pullups[pin].reg; - bit = BIT(pullups[pin].bit); + reg = info->reg; + bit = BIT(info->bit); if (sh_pfc_read_reg(pfc, PUEN + reg, 32) & bit) { if (sh_pfc_read_reg(pfc, PUD + reg, 32) & bit) @@ -5379,15 +5378,17 @@ static unsigned int r8a7795_pinmux_get_bias(struct sh_pfc *pfc, static void r8a7795_pinmux_set_bias(struct sh_pfc *pfc, unsigned int pin, unsigned int bias) { + const struct sh_pfc_bias_info *info; u32 enable, updown; u32 reg; u32 bit; - if (WARN_ON_ONCE(!pullups[pin].reg)) + info = sh_pfc_pin_to_bias_info(bias_info, ARRAY_SIZE(bias_info), pin); + if (!info) return; - reg = pullups[pin].reg; - bit = BIT(pullups[pin].bit); + reg = info->reg; + bit = BIT(info->bit); enable = sh_pfc_read_reg(pfc, PUEN + reg, 32) & ~bit; if (bias != PIN_CONFIG_BIAS_DISABLE) -- GitLab From 9229336861cbe0f0c81dbe884d5721b4d81c21b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Sat, 12 Nov 2016 17:04:25 +0100 Subject: [PATCH 0479/1442] pinctrl: sh-pfc: Add helper to handle bias lookup table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c314c9f15aa5f43f0e5c0e2602cc65798dbd1598 upstream. On some SoC there are no simple mapping of pins to bias register bits and a lookup table is needed. This logic is already implemented in some SoC specific drivers that could benefit from a generic implementation. Add helpers to deal with the lookup which later can be used by the SoC specific drivers. The logic used to lookup are different from the one it aims to replace, this is intentional. This new method reduces the memory consumption at the cost of increased CPU usage and fix a bug where a WARN() would incorrectly be triggered if the register offset is 0. Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sh-pfc/core.c | 15 +++++++++++++++ drivers/pinctrl/sh-pfc/core.h | 4 ++++ drivers/pinctrl/sh-pfc/sh_pfc.h | 6 ++++++ 3 files changed, 25 insertions(+) diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c index f3a8897d4e8f..cf80ce1dd7ce 100644 --- a/drivers/pinctrl/sh-pfc/core.c +++ b/drivers/pinctrl/sh-pfc/core.c @@ -389,6 +389,21 @@ int sh_pfc_config_mux(struct sh_pfc *pfc, unsigned mark, int pinmux_type) return 0; } +const struct sh_pfc_bias_info * +sh_pfc_pin_to_bias_info(const struct sh_pfc_bias_info *info, + unsigned int num, unsigned int pin) +{ + unsigned int i; + + for (i = 0; i < num; i++) + if (info[i].pin == pin) + return &info[i]; + + WARN_ONCE(1, "Pin %u is not in bias info list\n", pin); + + return NULL; +} + static int sh_pfc_init_ranges(struct sh_pfc *pfc) { struct sh_pfc_pin_range *range; diff --git a/drivers/pinctrl/sh-pfc/core.h b/drivers/pinctrl/sh-pfc/core.h index 0bbdea5849f4..6d598dd63720 100644 --- a/drivers/pinctrl/sh-pfc/core.h +++ b/drivers/pinctrl/sh-pfc/core.h @@ -33,4 +33,8 @@ void sh_pfc_write_reg(struct sh_pfc *pfc, u32 reg, unsigned int width, int sh_pfc_get_pin_index(struct sh_pfc *pfc, unsigned int pin); int sh_pfc_config_mux(struct sh_pfc *pfc, unsigned mark, int pinmux_type); +const struct sh_pfc_bias_info * +sh_pfc_pin_to_bias_info(const struct sh_pfc_bias_info *info, + unsigned int num, unsigned int pin); + #endif /* __SH_PFC_CORE_H__ */ diff --git a/drivers/pinctrl/sh-pfc/sh_pfc.h b/drivers/pinctrl/sh-pfc/sh_pfc.h index 2345421103db..9556c172e3d2 100644 --- a/drivers/pinctrl/sh-pfc/sh_pfc.h +++ b/drivers/pinctrl/sh-pfc/sh_pfc.h @@ -189,6 +189,12 @@ struct sh_pfc_window { unsigned long size; }; +struct sh_pfc_bias_info { + u16 pin; + u16 reg : 11; + u16 bit : 5; +}; + struct sh_pfc_pin_range; struct sh_pfc { -- GitLab From 8ac055af47aef130a124527aebecb25172107d0b Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Thu, 1 Dec 2016 10:44:16 -0600 Subject: [PATCH 0480/1442] regulator: tps65086: Fix 25mV ranges for BUCK regulators commit d8ca5bd158f738c4fa6974ee388c381f64db7905 upstream. The BUCK regulators 3, 4, and 5 also have a 10mV step mode, adjust the tables and logic to reflect the data-sheet for these regulators. fixes: d2a2e729a666 ("regulator: tps65086: Add regulator driver for the TPS65086 PMIC") Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/mfd/tps65086.txt | 2 +- drivers/regulator/tps65086-regulator.c | 54 ++++++++++--------- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/tps65086.txt b/Documentation/devicetree/bindings/mfd/tps65086.txt index d3705612a846..9cfa886fe99f 100644 --- a/Documentation/devicetree/bindings/mfd/tps65086.txt +++ b/Documentation/devicetree/bindings/mfd/tps65086.txt @@ -23,7 +23,7 @@ Required properties: defined below. Optional regulator properties: - - ti,regulator-step-size-25mv : This is applicable for buck[1,2,6], set this + - ti,regulator-step-size-25mv : This is applicable for buck[1-6], set this if the regulator is factory set with a 25mv step voltage mapping. - ti,regulator-decay : This is applicable for buck[1-6], set this if diff --git a/drivers/regulator/tps65086-regulator.c b/drivers/regulator/tps65086-regulator.c index 33f389d583ef..caf174ffa316 100644 --- a/drivers/regulator/tps65086-regulator.c +++ b/drivers/regulator/tps65086-regulator.c @@ -71,18 +71,17 @@ struct tps65086_regulator { unsigned int decay_mask; }; -static const struct regulator_linear_range tps65086_buck126_10mv_ranges[] = { +static const struct regulator_linear_range tps65086_10mv_ranges[] = { REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0), REGULATOR_LINEAR_RANGE(410000, 0x1, 0x7F, 10000), }; static const struct regulator_linear_range tps65086_buck126_25mv_ranges[] = { - REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0), - REGULATOR_LINEAR_RANGE(1000000, 0x1, 0x18, 0), + REGULATOR_LINEAR_RANGE(1000000, 0x0, 0x18, 0), REGULATOR_LINEAR_RANGE(1025000, 0x19, 0x7F, 25000), }; -static const struct regulator_linear_range tps65086_buck345_ranges[] = { +static const struct regulator_linear_range tps65086_buck345_25mv_ranges[] = { REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0), REGULATOR_LINEAR_RANGE(425000, 0x1, 0x7F, 25000), }; @@ -125,27 +124,27 @@ static int tps65086_of_parse_cb(struct device_node *dev, static struct tps65086_regulator regulators[] = { TPS65086_REGULATOR("BUCK1", "buck1", BUCK1, 0x80, TPS65086_BUCK1CTRL, BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(0), - tps65086_buck126_10mv_ranges, TPS65086_BUCK1CTRL, + tps65086_10mv_ranges, TPS65086_BUCK1CTRL, BIT(0)), TPS65086_REGULATOR("BUCK2", "buck2", BUCK2, 0x80, TPS65086_BUCK2CTRL, BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(1), - tps65086_buck126_10mv_ranges, TPS65086_BUCK2CTRL, + tps65086_10mv_ranges, TPS65086_BUCK2CTRL, BIT(0)), TPS65086_REGULATOR("BUCK3", "buck3", BUCK3, 0x80, TPS65086_BUCK3VID, BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(2), - tps65086_buck345_ranges, TPS65086_BUCK3DECAY, + tps65086_10mv_ranges, TPS65086_BUCK3DECAY, BIT(0)), TPS65086_REGULATOR("BUCK4", "buck4", BUCK4, 0x80, TPS65086_BUCK4VID, BUCK_VID_MASK, TPS65086_BUCK4CTRL, BIT(0), - tps65086_buck345_ranges, TPS65086_BUCK4VID, + tps65086_10mv_ranges, TPS65086_BUCK4VID, BIT(0)), TPS65086_REGULATOR("BUCK5", "buck5", BUCK5, 0x80, TPS65086_BUCK5VID, BUCK_VID_MASK, TPS65086_BUCK5CTRL, BIT(0), - tps65086_buck345_ranges, TPS65086_BUCK5CTRL, + tps65086_10mv_ranges, TPS65086_BUCK5CTRL, BIT(0)), TPS65086_REGULATOR("BUCK6", "buck6", BUCK6, 0x80, TPS65086_BUCK6VID, BUCK_VID_MASK, TPS65086_BUCK6CTRL, BIT(0), - tps65086_buck126_10mv_ranges, TPS65086_BUCK6CTRL, + tps65086_10mv_ranges, TPS65086_BUCK6CTRL, BIT(0)), TPS65086_REGULATOR("LDOA1", "ldoa1", LDOA1, 0xF, TPS65086_LDOA1CTRL, VDOA1_VID_MASK, TPS65086_LDOA1CTRL, BIT(0), @@ -162,18 +161,6 @@ static struct tps65086_regulator regulators[] = { TPS65086_SWITCH("VTT", "vtt", VTT, TPS65086_SWVTT_EN, BIT(4)), }; -static inline bool has_25mv_mode(int id) -{ - switch (id) { - case BUCK1: - case BUCK2: - case BUCK6: - return true; - default: - return false; - } -} - static int tps65086_of_parse_cb(struct device_node *dev, const struct regulator_desc *desc, struct regulator_config *config) @@ -181,12 +168,27 @@ static int tps65086_of_parse_cb(struct device_node *dev, int ret; /* Check for 25mV step mode */ - if (has_25mv_mode(desc->id) && - of_property_read_bool(config->of_node, "ti,regulator-step-size-25mv")) { - regulators[desc->id].desc.linear_ranges = + if (of_property_read_bool(config->of_node, "ti,regulator-step-size-25mv")) { + switch (desc->id) { + case BUCK1: + case BUCK2: + case BUCK6: + regulators[desc->id].desc.linear_ranges = tps65086_buck126_25mv_ranges; - regulators[desc->id].desc.n_linear_ranges = + regulators[desc->id].desc.n_linear_ranges = ARRAY_SIZE(tps65086_buck126_25mv_ranges); + break; + case BUCK3: + case BUCK4: + case BUCK5: + regulators[desc->id].desc.linear_ranges = + tps65086_buck345_25mv_ranges; + regulators[desc->id].desc.n_linear_ranges = + ARRAY_SIZE(tps65086_buck345_25mv_ranges); + break; + default: + dev_warn(config->dev, "25mV step mode only valid for BUCK regulators\n"); + } } /* Check for decay mode */ -- GitLab From 6b94626c9edfb434a779d59d8409c982d206b25d Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 11 Nov 2016 11:12:43 +0800 Subject: [PATCH 0481/1442] regulator: axp20x: Fix axp809 ldo_io registration error on cold boot commit 618c808968852609d2d9f0e5cfc351a4807ef8d0 upstream. The maximum supported voltage for ldo_io# is 3.3V, but on cold boot the selector comes up at 0x1f, which maps to 3.8V. This was previously corrected by Allwinner's U-boot, which set all regulators on the PMICs to some pre-configured voltage. With recent progress in U-boot SPL support, this is no longer the case. In any case we should handle this quirk in the kernel driver as well. This invalid setting causes _regulator_get_voltage() to fail with -EINVAL which causes regulator registration to fail when constrains are used: [ 1.054181] vcc-pg: failed to get the current voltage(-22) [ 1.059670] axp20x-regulator axp20x-regulator.0: Failed to register ldo_io0 [ 1.069749] axp20x-regulator: probe of axp20x-regulator.0 failed with error -22 This commits makes the axp20x regulator driver accept the 0x1f register value, fixing this. The datasheet does not guarantee reliable operation above 3.3V, so on boards where this regulator is used the regulator-max-microvolt setting must be 3.3V or less. This is essentially the same as the commit f40d4896bf32 ("regulator: axp20x: Fix axp22x ldo_io registration error on cold boot") for AXP22x PMICs. Fixes: a51f9f4622a3 ("regulator: axp20x: support AXP809 variant") Signed-off-by: Chen-Yu Tsai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/axp20x-regulator.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index 54382ef902c6..e6a512ebeae2 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -337,10 +337,18 @@ static const struct regulator_desc axp809_regulators[] = { AXP22X_ELDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(1)), AXP_DESC(AXP809, ELDO3, "eldo3", "eldoin", 700, 3300, 100, AXP22X_ELDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(2)), - AXP_DESC_IO(AXP809, LDO_IO0, "ldo_io0", "ips", 700, 3300, 100, + /* + * Note the datasheet only guarantees reliable operation up to + * 3.3V, this needs to be enforced via dts provided constraints + */ + AXP_DESC_IO(AXP809, LDO_IO0, "ldo_io0", "ips", 700, 3800, 100, AXP22X_LDO_IO0_V_OUT, 0x1f, AXP20X_GPIO0_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), - AXP_DESC_IO(AXP809, LDO_IO1, "ldo_io1", "ips", 700, 3300, 100, + /* + * Note the datasheet only guarantees reliable operation up to + * 3.3V, this needs to be enforced via dts provided constraints + */ + AXP_DESC_IO(AXP809, LDO_IO1, "ldo_io1", "ips", 700, 3800, 100, AXP22X_LDO_IO1_V_OUT, 0x1f, AXP20X_GPIO1_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), AXP_DESC_FIXED(AXP809, RTC_LDO, "rtc_ldo", "ips", 1800), -- GitLab From a63bb198dc14b43558618bac461936661af74450 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 28 Oct 2016 11:09:45 +0200 Subject: [PATCH 0482/1442] drm/tegra: dpaux: Fix error handling commit 9376cad2073d2c122864754ea5f80025c8507b0b upstream. The devm_pinctrl_register() function returns an error pointer or a valid handle. So checking for NULL here is pointless and can never trigger. Check the returned value with IS_ERR instead and propagate this value as done in the other functions which call devm_pinctrl_register(). Fixes: 0751bb5c44fe ("drm/tegra: dpaux: Add pinctrl support") Signed-off-by: Christophe JAILLET Acked-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/tegra/dpaux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c index 059f409556d5..2fde44c3a1b3 100644 --- a/drivers/gpu/drm/tegra/dpaux.c +++ b/drivers/gpu/drm/tegra/dpaux.c @@ -539,9 +539,9 @@ static int tegra_dpaux_probe(struct platform_device *pdev) dpaux->desc.owner = THIS_MODULE; dpaux->pinctrl = devm_pinctrl_register(&pdev->dev, &dpaux->desc, dpaux); - if (!dpaux->pinctrl) { + if (IS_ERR(dpaux->pinctrl)) { dev_err(&pdev->dev, "failed to register pincontrol\n"); - return -ENODEV; + return PTR_ERR(dpaux->pinctrl); } #endif /* enable and clear all interrupts */ -- GitLab From c730a84aff6f3eca7dbe8b5e7bbc3cdd94418a54 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Oct 2016 11:54:31 +0300 Subject: [PATCH 0483/1442] drm/vc4: Fix a couple error codes in vc4_cl_lookup_bos() commit b2cdeb19f16ad984eb5bb9193f793d05a8101511 upstream. If the allocation fails the current code returns success. If copy_from_user() fails it returns the number of bytes remaining instead of -EFAULT. Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Signed-off-by: Dan Carpenter Reviewed-by: Eric Anholt Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_gem.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 47a095f392f8..303f23c96220 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -544,14 +544,15 @@ vc4_cl_lookup_bos(struct drm_device *dev, handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); if (!handles) { + ret = -ENOMEM; DRM_ERROR("Failed to allocate incoming GEM handles\n"); goto fail; } - ret = copy_from_user(handles, - (void __user *)(uintptr_t)args->bo_handles, - exec->bo_count * sizeof(uint32_t)); - if (ret) { + if (copy_from_user(handles, + (void __user *)(uintptr_t)args->bo_handles, + exec->bo_count * sizeof(uint32_t))) { + ret = -EFAULT; DRM_ERROR("Failed to copy in GEM handles\n"); goto fail; } -- GitLab From d65146c7fb91797d14edcd092a07d4432688f7d5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 09:22:27 +0300 Subject: [PATCH 0484/1442] drm/savage: dereferencing an error pointer commit f7741aa75e76440f4e9ecfe512feebe9bce33ca8 upstream. A recent cleanup changed the kmalloc() + copy_from_user() to memdup_user() but the error handling wasn't updated so we might call kfree(-EFAULT) and crash. Fixes: a6e3918bcdb1 ('GPU-DRM-Savage: Use memdup_user() rather than duplicating') Signed-off-by: Dan Carpenter Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161012062227.GU12841@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/savage/savage_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/savage/savage_state.c b/drivers/gpu/drm/savage/savage_state.c index 3dc0d8ff95ec..2db89bed52e8 100644 --- a/drivers/gpu/drm/savage/savage_state.c +++ b/drivers/gpu/drm/savage/savage_state.c @@ -1004,6 +1004,7 @@ int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_ kvb_addr = memdup_user(cmdbuf->vb_addr, cmdbuf->vb_size); if (IS_ERR(kvb_addr)) { ret = PTR_ERR(kvb_addr); + kvb_addr = NULL; goto done; } cmdbuf->vb_addr = kvb_addr; -- GitLab From 91ee732cb5632b0546d514daed6a72b7ed516f7b Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Wed, 14 Dec 2016 11:59:57 +0100 Subject: [PATCH 0485/1442] selftests: do not require bash to run netsocktests testcase commit 3659f98b5375d195f1870c3e508fe51e52206839 upstream. Nothing in this minimal script seems to require bash. We often run these tests on embedded devices where the only shell available is the busybox ash. Use sh instead. Signed-off-by: Rolf Eike Beer Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/run_netsocktests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index c09a682df56a..16058bbea7a8 100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh echo "--------------------" echo "running socket test" -- GitLab From 057ac4429aef93831aa90daae1fad840297274fa Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Wed, 14 Dec 2016 11:59:34 +0100 Subject: [PATCH 0486/1442] selftests: do not require bash for the generated test commit a2b1e8a20c992b01eeb76de00d4f534cbe9f3822 upstream. Nothing in this minimal script seems to require bash. We often run these tests on embedded devices where the only shell available is the busybox ash. Use sh instead. Signed-off-by: Rolf Eike Beer Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f770dba2a6f6..a899ef81c705 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -87,7 +87,7 @@ ifdef INSTALL_PATH done; @# Ask all targets to emit their test scripts - echo "#!/bin/bash" > $(ALL_SCRIPT) + echo "#!/bin/sh" > $(ALL_SCRIPT) echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT) echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) -- GitLab From ad4764b4c8ebb48da0680ac0e7b20d4f49fc6cd1 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 10 Jan 2017 16:58:18 -0800 Subject: [PATCH 0487/1442] zram: revalidate disk under init_lock commit e7ccfc4ccb703e0f033bd4617580039898e912dd upstream. Commit b4c5c60920e3 ("zram: avoid lockdep splat by revalidate_disk") moved revalidate_disk call out of init_lock to avoid lockdep false-positive splat. However, commit 08eee69fcf6b ("zram: remove init_lock in zram_make_request") removed init_lock in IO path so there is no worry about lockdep splat. So, let's restore it. This patch is needed to set BDI_CAP_STABLE_WRITES atomically in next patch. Fixes: da9556a2367c ("zram: user per-cpu compression streams") Link: http://lkml.kernel.org/r/1482366980-3782-3-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Cc: Takashi Iwai Cc: Hyeoncheol Lee Cc: Cc: Sangseok Lee Cc: Hugh Dickins Cc: Darrick J. Wong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/zram/zram_drv.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 5497f7fc44d0..c0b95dd36101 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1094,14 +1094,8 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - up_write(&zram->init_lock); - - /* - * Revalidate disk out of the init_lock to avoid lockdep splat. - * It's okay because disk's capacity is protected by init_lock - * so that revalidate_disk always sees up-to-date capacity. - */ revalidate_disk(zram->disk); + up_write(&zram->init_lock); return len; -- GitLab From 2e264fb546fa5eade611dca09d8734e3232ff9b2 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 10 Jan 2017 16:58:21 -0800 Subject: [PATCH 0488/1442] zram: support BDI_CAP_STABLE_WRITES commit b09ab054b69b07077bd3292f67e777861ac796e5 upstream. zram has used per-cpu stream feature from v4.7. It aims for increasing cache hit ratio of scratch buffer for compressing. Downside of that approach is that zram should ask memory space for compressed page in per-cpu context which requires stricted gfp flag which could be failed. If so, it retries to allocate memory space out of per-cpu context so it could get memory this time and compress the data again, copies it to the memory space. In this scenario, zram assumes the data should never be changed but it is not true without stable page support. So, If the data is changed under us, zram can make buffer overrun so that zsmalloc free object chain is broken so system goes crash like below https://bugzilla.suse.com/show_bug.cgi?id=997574 This patch adds BDI_CAP_STABLE_WRITES to zram for declaring "I am block device needing *stable write*". Fixes: da9556a2367c ("zram: user per-cpu compression streams") Link: http://lkml.kernel.org/r/1482366980-3782-4-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Cc: Takashi Iwai Cc: Hyeoncheol Lee Cc: Cc: Sangseok Lee Cc: Hugh Dickins Cc: Darrick J. Wong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/zram/zram_drv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c0b95dd36101..d2ef51ca9cf4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -111,6 +112,14 @@ static inline bool is_partial_io(struct bio_vec *bvec) return bvec->bv_len != PAGE_SIZE; } +static void zram_revalidate_disk(struct zram *zram) +{ + revalidate_disk(zram->disk); + /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ + zram->disk->queue->backing_dev_info.capabilities |= + BDI_CAP_STABLE_WRITES; +} + /* * Check if request is within bounds and aligned on zram logical blocks. */ @@ -1094,7 +1103,7 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - revalidate_disk(zram->disk); + zram_revalidate_disk(zram); up_write(&zram->init_lock); return len; @@ -1142,7 +1151,7 @@ static ssize_t reset_store(struct device *dev, /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - revalidate_disk(zram->disk); + zram_revalidate_disk(zram); bdput(bdev); mutex_lock(&bdev->bd_mutex); -- GitLab From 87fa6f37fa29565f13a1db0cdcf8ad2d0eb0f76e Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 10 Jan 2017 16:57:15 -0800 Subject: [PATCH 0489/1442] dax: fix deadlock with DAX 4k holes commit 965d004af54088d138f806d04d803fb60d441986 upstream. Currently in DAX if we have three read faults on the same hole address we can end up with the following: Thread 0 Thread 1 Thread 2 -------- -------- -------- dax_iomap_fault grab_mapping_entry lock_slot dax_iomap_fault grab_mapping_entry get_unlocked_mapping_entry dax_iomap_fault grab_mapping_entry get_unlocked_mapping_entry dax_load_hole find_or_create_page ... page_cache_tree_insert dax_wake_mapping_entry_waiter __radix_tree_replace get_page lock_page ... put_locked_mapping_entry unlock_page put_page The crux of the problem is that once we insert a 4k zero page, all locking from then on is done in terms of that 4k zero page and any additional threads sleeping on the empty DAX entry will never be woken. Fix this by waking all sleepers when we replace the DAX radix tree entry with a 4k zero page. This will allow all sleeping threads to successfully transition from locking based on the DAX empty entry to locking on the 4k zero page. With the test case reported by Xiong this happens very regularly in my test setup, with some runs resulting in 9+ threads in this deadlocked state. With this fix I've been able to run that same test dozens of times in a loop without issue. Fixes: ac401cc78242 ("dax: New fault locking") Link: http://lkml.kernel.org/r/1483479365-13607-1-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reported-by: Xiong Zhou Reviewed-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 9a50acecc473..779801092ef1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -144,7 +144,7 @@ static int page_cache_tree_insert(struct address_space *mapping, workingset_node_pages_dec(node); /* Wakeup waiters for exceptional entry lock */ dax_wake_mapping_entry_waiter(mapping, page->index, - false); + true); } } radix_tree_replace_slot(slot, page); -- GitLab From 8edd365ee94ca4541270ee00b90705c634a085c6 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 10 Jan 2017 16:57:51 -0800 Subject: [PATCH 0490/1442] mm: pmd dirty emulation in page fault handler commit 20f664aabeb88d582b623a625f83b0454fa34f07 upstream. Andreas reported [1] made a test in jemalloc hang in THP mode in arm64: http://lkml.kernel.org/r/mvmmvfy37g1.fsf@hawking.suse.de The problem is currently page fault handler doesn't supports dirty bit emulation of pmd for non-HW dirty-bit architecture so that application stucks until VM marked the pmd dirty. How the emulation work depends on the architecture. In case of arm64, when it set up pte firstly, it sets pte PTE_RDONLY to get a chance to mark the pte dirty via triggering page fault when store access happens. Once the page fault occurs, VM marks the pmd dirty and arch code for setting pmd will clear PTE_RDONLY for application to proceed. IOW, if VM doesn't mark the pmd dirty, application hangs forever by repeated fault(i.e., store op but the pmd is PTE_RDONLY). This patch enables pmd dirty-bit emulation for those architectures. [1] b8d3c4c3009d, mm/huge_memory.c: don't split THP page when MADV_FREE syscall is called Fixes: b8d3c4c3009d ("mm/huge_memory.c: don't split THP page when MADV_FREE syscall is called") Link: http://lkml.kernel.org/r/1482506098-6149-1-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Reported-by: Andreas Schwab Tested-by: Andreas Schwab Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Jason Evans Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d4a6e4001512..8ca40b70beae 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -872,15 +872,17 @@ void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd) { pmd_t entry; unsigned long haddr; + bool write = fe->flags & FAULT_FLAG_WRITE; fe->ptl = pmd_lock(fe->vma->vm_mm, fe->pmd); if (unlikely(!pmd_same(*fe->pmd, orig_pmd))) goto unlock; entry = pmd_mkyoung(orig_pmd); + if (write) + entry = pmd_mkdirty(entry); haddr = fe->address & HPAGE_PMD_MASK; - if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry, - fe->flags & FAULT_FLAG_WRITE)) + if (pmdp_set_access_flags(fe->vma, haddr, fe->pmd, entry, write)) update_mmu_cache_pmd(fe->vma, fe->address, fe->pmd); unlock: -- GitLab From 692755b1006d1e603ae7b86e40a90fcdfb166209 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 10 Jan 2017 16:57:36 -0800 Subject: [PATCH 0491/1442] mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done} commit f931ab479dd24cf7a2c6e2df19778406892591fb upstream. Both arch_add_memory() and arch_remove_memory() expect a single threaded context. For example, arch/x86/mm/init_64.c::kernel_physical_mapping_init() does not hold any locks over this check and branch: if (pgd_val(*pgd)) { pud = (pud_t *)pgd_page_vaddr(*pgd); paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end), page_size_mask); continue; } pud = alloc_low_page(); paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end), page_size_mask); The result is that two threads calling devm_memremap_pages() simultaneously can end up colliding on pgd initialization. This leads to crash signatures like the following where the loser of the race initializes the wrong pgd entry: BUG: unable to handle kernel paging request at ffff888ebfff0000 IP: memcpy_erms+0x6/0x10 PGD 2f8e8fc067 PUD 0 /* <---- Invalid PUD */ Oops: 0000 [#1] SMP DEBUG_PAGEALLOC CPU: 54 PID: 3818 Comm: systemd-udevd Not tainted 4.6.7+ #13 task: ffff882fac290040 ti: ffff882f887a4000 task.ti: ffff882f887a4000 RIP: memcpy_erms+0x6/0x10 [..] Call Trace: ? pmem_do_bvec+0x205/0x370 [nd_pmem] ? blk_queue_enter+0x3a/0x280 pmem_rw_page+0x38/0x80 [nd_pmem] bdev_read_page+0x84/0xb0 Hold the standard memory hotplug mutex over calls to arch_{add,remove}_memory(). Fixes: 41e94a851304 ("add devm_memremap_pages") Link: http://lkml.kernel.org/r/148357647831.9498.12606007370121652979.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/memremap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/memremap.c b/kernel/memremap.c index b501e390bb34..9ecedc28b928 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -246,7 +246,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data) /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); + mem_hotplug_begin(); arch_remove_memory(align_start, align_size); + mem_hotplug_done(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, @@ -358,7 +360,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_pfn_remap; + mem_hotplug_begin(); error = arch_add_memory(nid, align_start, align_size, true); + mem_hotplug_done(); if (error) goto err_add_memory; -- GitLab From 6c9bd81cb9eab3508227186f3e036d38146d2c0d Mon Sep 17 00:00:00 2001 From: Eric Ren Date: Tue, 10 Jan 2017 16:57:33 -0800 Subject: [PATCH 0492/1442] ocfs2: fix crash caused by stale lvb with fsdlm plugin commit e7ee2c089e94067d68475990bdeed211c8852917 upstream. The crash happens rather often when we reset some cluster nodes while nodes contend fiercely to do truncate and append. The crash backtrace is below: dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover_grant 1 locks on 971 resources dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover 9 generation 5 done: 4 ms ocfs2: Begin replay journal (node 318952601, slot 2) on device (253,18) ocfs2: End replay journal (node 318952601, slot 2) on device (253,18) ocfs2: Beginning quota recovery on device (253,18) for slot 2 ocfs2: Finishing quota recovery on device (253,18) for slot 2 (truncate,30154,1):ocfs2_truncate_file:470 ERROR: bug expression: le64_to_cpu(fe->i_size) != i_size_read(inode) (truncate,30154,1):ocfs2_truncate_file:470 ERROR: Inode 290321, inode i_size = 732 != di i_size = 937, i_flags = 0x1 ------------[ cut here ]------------ kernel BUG at /usr/src/linux/fs/ocfs2/file.c:470! invalid opcode: 0000 [#1] SMP Modules linked in: ocfs2_stack_user(OEN) ocfs2(OEN) ocfs2_nodemanager ocfs2_stackglue(OEN) quota_tree dlm(OEN) configfs fuse sd_mod iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi af_packet iscsi_ibft iscsi_boot_sysfs softdog xfs libcrc32c ppdev parport_pc pcspkr parport joydev virtio_balloon virtio_net i2c_piix4 acpi_cpufreq button processor ext4 crc16 jbd2 mbcache ata_generic cirrus virtio_blk ata_piix drm_kms_helper ahci syscopyarea libahci sysfillrect sysimgblt fb_sys_fops ttm floppy libata drm virtio_pci virtio_ring uhci_hcd virtio ehci_hcd usbcore serio_raw usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4 Supported: No, Unsupported modules are loaded CPU: 1 PID: 30154 Comm: truncate Tainted: G OE N 4.4.21-69-default #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20151112_172657-sheep25 04/01/2014 task: ffff88004ff6d240 ti: ffff880074e68000 task.ti: ffff880074e68000 RIP: 0010:[] [] ocfs2_truncate_file+0x640/0x6c0 [ocfs2] RSP: 0018:ffff880074e6bd50 EFLAGS: 00010282 RAX: 0000000000000074 RBX: 000000000000029e RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000246 RDI: 0000000000000246 RBP: ffff880074e6bda8 R08: 000000003675dc7a R09: ffffffff82013414 R10: 0000000000034c50 R11: 0000000000000000 R12: ffff88003aab3448 R13: 00000000000002dc R14: 0000000000046e11 R15: 0000000000000020 FS: 00007f839f965700(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f839f97e000 CR3: 0000000036723000 CR4: 00000000000006e0 Call Trace: ocfs2_setattr+0x698/0xa90 [ocfs2] notify_change+0x1ae/0x380 do_truncate+0x5e/0x90 do_sys_ftruncate.constprop.11+0x108/0x160 entry_SYSCALL_64_fastpath+0x12/0x6d Code: 24 28 ba d6 01 00 00 48 c7 c6 30 43 62 a0 8b 41 2c 89 44 24 08 48 8b 41 20 48 c7 c1 78 a3 62 a0 48 89 04 24 31 c0 e8 a0 97 f9 ff <0f> 0b 3d 00 fe ff ff 0f 84 ab fd ff ff 83 f8 fc 0f 84 a2 fd ff RIP [] ocfs2_truncate_file+0x640/0x6c0 [ocfs2] It's because ocfs2_inode_lock() get us stale LVB in which the i_size is not equal to the disk i_size. We mistakenly trust the LVB because the underlaying fsdlm dlm_lock() doesn't set lkb_sbflags with DLM_SBF_VALNOTVALID properly for us. But, why? The current code tries to downconvert lock without DLM_LKF_VALBLK flag to tell o2cb don't update RSB's LVB if it's a PR->NULL conversion, even if the lock resource type needs LVB. This is not the right way for fsdlm. The fsdlm plugin behaves different on DLM_LKF_VALBLK, it depends on DLM_LKF_VALBLK to decide if we care about the LVB in the LKB. If DLM_LKF_VALBLK is not set, fsdlm will skip recovering RSB's LVB from this lkb and set the right DLM_SBF_VALNOTVALID appropriately when node failure happens. The following diagram briefly illustrates how this crash happens: RSB1 is inode metadata lock resource with LOCK_TYPE_USES_LVB; The 1st round: Node1 Node2 RSB1: PR RSB1(master): NULL->EX ocfs2_downconvert_lock(PR->NULL, set_lvb==0) ocfs2_dlm_lock(no DLM_LKF_VALBLK) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - dlm_lock(no DLM_LKF_VALBLK) convert_lock(overwrite lkb->lkb_exflags with no DLM_LKF_VALBLK) RSB1: NULL RSB1: EX reset Node2 dlm_recover_rsbs() recover_lvb() /* The LVB is not trustable if the node with EX fails and * no lock >= PR is left. We should set RSB_VALNOTVALID for RSB1. */ if(!(kb_exflags & DLM_LKF_VALBLK)) /* This means we miss the chance to return; * to invalid the LVB here. */ The 2nd round: Node 1 Node2 RSB1(become master from recovery) ocfs2_setattr() ocfs2_inode_lock(NULL->EX) /* dlm_lock() return the stale lvb without setting DLM_SBF_VALNOTVALID */ ocfs2_meta_lvb_is_trustable() return 1 /* so we don't refresh inode from disk */ ocfs2_truncate_file() mlog_bug_on_msg(disk isize != i_size_read(inode)) /* crash! */ The fix is quite straightforward. We keep to set DLM_LKF_VALBLK flag for dlm_lock() if the lock resource type needs LVB and the fsdlm plugin is uesed. Link: http://lkml.kernel.org/r/1481275846-6604-1-git-send-email-zren@suse.com Signed-off-by: Eric Ren Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlmglue.c | 10 ++++++++++ fs/ocfs2/stackglue.c | 6 ++++++ fs/ocfs2/stackglue.h | 3 +++ 3 files changed, 19 insertions(+) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 83d576f6a287..77d1632e905d 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3303,6 +3303,16 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, lockres->l_level, new_level); + /* + * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always + * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that + * we can recover correctly from node failure. Otherwise, we may get + * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. + */ + if (!ocfs2_is_o2cb_active() && + lockres->l_ops->flags & LOCK_TYPE_USES_LVB) + lvb = 1; + if (lvb) dlm_flags |= DLM_LKF_VALBLK; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 52c07346bea3..820359096c7a 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; */ static struct ocfs2_stack_plugin *active_stack; +inline int ocfs2_is_o2cb_active(void) +{ + return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB); +} +EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active); + static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) { struct ocfs2_stack_plugin *p; diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index f2dce10fae54..e3036e1790e8 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -298,6 +298,9 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); +/* In ocfs2_downconvert_lock(), we need to know which stack we are using */ +int ocfs2_is_o2cb_active(void); + extern struct kset *ocfs2_kset; #endif /* STACKGLUE_H */ -- GitLab From 07fc9575e88aae0ac82997b282343b1ed757957e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 10 Jan 2017 16:58:04 -0800 Subject: [PATCH 0493/1442] mm, memcg: fix the active list aging for lowmem requests when memcg is enabled commit b4536f0c829c8586544c94735c343f9b5070bd01 upstream. Nils Holland and Klaus Ethgen have reported unexpected OOM killer invocations with 32b kernel starting with 4.8 kernels kworker/u4:5 invoked oom-killer: gfp_mask=0x2400840(GFP_NOFS|__GFP_NOFAIL), nodemask=0, order=0, oom_score_adj=0 kworker/u4:5 cpuset=/ mems_allowed=0 CPU: 1 PID: 2603 Comm: kworker/u4:5 Not tainted 4.9.0-gentoo #2 [...] Mem-Info: active_anon:58685 inactive_anon:90 isolated_anon:0 active_file:274324 inactive_file:281962 isolated_file:0 unevictable:0 dirty:649 writeback:0 unstable:0 slab_reclaimable:40662 slab_unreclaimable:17754 mapped:7382 shmem:202 pagetables:351 bounce:0 free:206736 free_pcp:332 free_cma:0 Node 0 active_anon:234740kB inactive_anon:360kB active_file:1097296kB inactive_file:1127848kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:29528kB dirty:2596kB writeback:0kB shmem:0kB shmem_thp: 0kB shmem_pmdmapped: 184320kB anon_thp: 808kB writeback_tmp:0kB unstable:0kB pages_scanned:0 all_unreclaimable? no DMA free:3952kB min:788kB low:984kB high:1180kB active_anon:0kB inactive_anon:0kB active_file:7316kB inactive_file:0kB unevictable:0kB writepending:96kB present:15992kB managed:15916kB mlocked:0kB slab_reclaimable:3200kB slab_unreclaimable:1408kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB lowmem_reserve[]: 0 813 3474 3474 Normal free:41332kB min:41368kB low:51708kB high:62048kB active_anon:0kB inactive_anon:0kB active_file:532748kB inactive_file:44kB unevictable:0kB writepending:24kB present:897016kB managed:836248kB mlocked:0kB slab_reclaimable:159448kB slab_unreclaimable:69608kB kernel_stack:1112kB pagetables:1404kB bounce:0kB free_pcp:528kB local_pcp:340kB free_cma:0kB lowmem_reserve[]: 0 0 21292 21292 HighMem free:781660kB min:512kB low:34356kB high:68200kB active_anon:234740kB inactive_anon:360kB active_file:557232kB inactive_file:1127804kB unevictable:0kB writepending:2592kB present:2725384kB managed:2725384kB mlocked:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:800kB local_pcp:608kB free_cma:0kB the oom killer is clearly pre-mature because there there is still a lot of page cache in the zone Normal which should satisfy this lowmem request. Further debugging has shown that the reclaim cannot make any forward progress because the page cache is hidden in the active list which doesn't get rotated because inactive_list_is_low is not memcg aware. The code simply subtracts per-zone highmem counters from the respective memcg's lru sizes which doesn't make any sense. We can simply end up always seeing the resulting active and inactive counts 0 and return false. This issue is not limited to 32b kernels but in practice the effect on systems without CONFIG_HIGHMEM would be much harder to notice because we do not invoke the OOM killer for allocations requests targeting < ZONE_NORMAL. Fix the issue by tracking per zone lru page counts in mem_cgroup_per_node and subtract per-memcg highmem counts when memcg is enabled. Introduce helper lruvec_zone_lru_size which redirects to either zone counters or mem_cgroup_get_zone_lru_size when appropriate. We are losing empty LRU but non-zero lru size detection introduced by ca707239e8a7 ("mm: update_lru_size warn and reset bad lru_size") because of the inherent zone vs. node discrepancy. Fixes: f8d1a31163fc ("mm: consider whether to decivate based on eligible zones inactive ratio") Link: http://lkml.kernel.org/r/20170104100825.3729-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Nils Holland Tested-by: Nils Holland Reported-by: Klaus Ethgen Acked-by: Minchan Kim Acked-by: Mel Gorman Acked-by: Johannes Weiner Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/memcontrol.h | 26 +++++++++++++++++++++++--- include/linux/mm_inline.h | 2 +- mm/memcontrol.c | 18 ++++++++---------- mm/vmscan.c | 27 +++++++++++++++++---------- 4 files changed, 49 insertions(+), 24 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 61d20c17f3b7..254698856b8f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -120,7 +120,7 @@ struct mem_cgroup_reclaim_iter { */ struct mem_cgroup_per_node { struct lruvec lruvec; - unsigned long lru_size[NR_LRU_LISTS]; + unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; @@ -432,7 +432,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg) int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, - int nr_pages); + int zid, int nr_pages); unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, int nid, unsigned int lru_mask); @@ -441,9 +441,23 @@ static inline unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { struct mem_cgroup_per_node *mz; + unsigned long nr_pages = 0; + int zid; mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - return mz->lru_size[lru]; + for (zid = 0; zid < MAX_NR_ZONES; zid++) + nr_pages += mz->lru_zone_size[zid][lru]; + return nr_pages; +} + +static inline +unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, + enum lru_list lru, int zone_idx) +{ + struct mem_cgroup_per_node *mz; + + mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + return mz->lru_zone_size[zone_idx][lru]; } void mem_cgroup_handle_over_high(void); @@ -671,6 +685,12 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { return 0; } +static inline +unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, + enum lru_list lru, int zone_idx) +{ + return 0; +} static inline unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 71613e8a720f..41d376e7116d 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -39,7 +39,7 @@ static __always_inline void update_lru_size(struct lruvec *lruvec, { __update_lru_size(lruvec, lru, zid, nr_pages); #ifdef CONFIG_MEMCG - mem_cgroup_update_lru_size(lruvec, lru, nr_pages); + mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages); #endif } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0f870ba43942..d536a9daa511 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -625,8 +625,8 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, int nid, unsigned int lru_mask) { + struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg); unsigned long nr = 0; - struct mem_cgroup_per_node *mz; enum lru_list lru; VM_BUG_ON((unsigned)nid >= nr_node_ids); @@ -634,8 +634,7 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, for_each_lru(lru) { if (!(BIT(lru) & lru_mask)) continue; - mz = mem_cgroup_nodeinfo(memcg, nid); - nr += mz->lru_size[lru]; + nr += mem_cgroup_get_lru_size(lruvec, lru); } return nr; } @@ -1002,6 +1001,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd * mem_cgroup_update_lru_size - account for adding or removing an lru page * @lruvec: mem_cgroup per zone lru vector * @lru: index of lru list the page is sitting on + * @zid: zone id of the accounted pages * @nr_pages: positive when adding or negative when removing * * This function must be called under lru_lock, just before a page is added @@ -1009,27 +1009,25 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd * so as to allow it to check that lru_size 0 is consistent with list_empty). */ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, - int nr_pages) + int zid, int nr_pages) { struct mem_cgroup_per_node *mz; unsigned long *lru_size; long size; - bool empty; if (mem_cgroup_disabled()) return; mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - lru_size = mz->lru_size + lru; - empty = list_empty(lruvec->lists + lru); + lru_size = &mz->lru_zone_size[zid][lru]; if (nr_pages < 0) *lru_size += nr_pages; size = *lru_size; - if (WARN_ONCE(size < 0 || empty != !size, - "%s(%p, %d, %d): lru_size %ld but %sempty\n", - __func__, lruvec, lru, nr_pages, size, empty ? "" : "not ")) { + if (WARN_ONCE(size < 0, + "%s(%p, %d, %d): lru_size %ld\n", + __func__, lruvec, lru, nr_pages, size)) { VM_BUG_ON(1); *lru_size = 0; } diff --git a/mm/vmscan.c b/mm/vmscan.c index c4abf08861d2..fa30010a5277 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -242,6 +242,16 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); } +unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru, + int zone_idx) +{ + if (!mem_cgroup_disabled()) + return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx); + + return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx], + NR_ZONE_LRU_BASE + lru); +} + /* * Add a shrinker callback to be called from the vm. */ @@ -1382,8 +1392,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) * be complete before mem_cgroup_update_lru_size due to a santity check. */ static __always_inline void update_lru_sizes(struct lruvec *lruvec, - enum lru_list lru, unsigned long *nr_zone_taken, - unsigned long nr_taken) + enum lru_list lru, unsigned long *nr_zone_taken) { int zid; @@ -1392,11 +1401,11 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec, continue; __update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); - } - #ifdef CONFIG_MEMCG - mem_cgroup_update_lru_size(lruvec, lru, -nr_taken); + mem_cgroup_update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); #endif + } + } /* @@ -1501,7 +1510,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, *nr_scanned = scan; trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan, nr_taken, mode, is_file_lru(lru)); - update_lru_sizes(lruvec, lru, nr_zone_taken, nr_taken); + update_lru_sizes(lruvec, lru, nr_zone_taken); return nr_taken; } @@ -2047,10 +2056,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, if (!managed_zone(zone)) continue; - inactive_zone = zone_page_state(zone, - NR_ZONE_LRU_BASE + (file * LRU_FILE)); - active_zone = zone_page_state(zone, - NR_ZONE_LRU_BASE + (file * LRU_FILE) + LRU_ACTIVE); + inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid); + active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid); inactive -= min(inactive, inactive_zone); active -= min(active, active_zone); -- GitLab From 6ca29ee3ca0dd517f616cb4eae0b7c83701b8f4e Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 10 Jan 2017 16:58:15 -0800 Subject: [PATCH 0494/1442] mm: support anonymous stable page commit f05714293a591038304ddae7cb0dd747bb3786cc upstream. During developemnt for zram-swap asynchronous writeback, I found strange corruption of compressed page, resulting in: Modules linked in: zram(E) CPU: 3 PID: 1520 Comm: zramd-1 Tainted: G E 4.8.0-mm1-00320-ge0d4894c9c38-dirty #3274 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 task: ffff88007620b840 task.stack: ffff880078090000 RIP: set_freeobj.part.43+0x1c/0x1f RSP: 0018:ffff880078093ca8 EFLAGS: 00010246 RAX: 0000000000000018 RBX: ffff880076798d88 RCX: ffffffff81c408c8 RDX: 0000000000000018 RSI: 0000000000000000 RDI: 0000000000000246 RBP: ffff880078093cb0 R08: 0000000000000000 R09: 0000000000000000 R10: ffff88005bc43030 R11: 0000000000001df3 R12: ffff880076798d88 R13: 000000000005bc43 R14: ffff88007819d1b8 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff88007e380000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc934048f20 CR3: 0000000077b01000 CR4: 00000000000406e0 Call Trace: obj_malloc+0x22b/0x260 zs_malloc+0x1e4/0x580 zram_bvec_rw+0x4cd/0x830 [zram] page_requests_rw+0x9c/0x130 [zram] zram_thread+0xe6/0x173 [zram] kthread+0xca/0xe0 ret_from_fork+0x25/0x30 With investigation, it reveals currently stable page doesn't support anonymous page. IOW, reuse_swap_page can reuse the page without waiting writeback completion so it can overwrite page zram is compressing. Unfortunately, zram has used per-cpu stream feature from v4.7. It aims for increasing cache hit ratio of scratch buffer for compressing. Downside of that approach is that zram should ask memory space for compressed page in per-cpu context which requires stricted gfp flag which could be failed. If so, it retries to allocate memory space out of per-cpu context so it could get memory this time and compress the data again, copies it to the memory space. In this scenario, zram assumes the data should never be changed but it is not true unless stable page supports. So, If the data is changed under us, zram can make buffer overrun because second compression size could be bigger than one we got in previous trial and blindly, copy bigger size object to smaller buffer which is buffer overrun. The overrun breaks zsmalloc free object chaining so system goes crash like above. I think below is same problem. https://bugzilla.suse.com/show_bug.cgi?id=997574 Unfortunately, reuse_swap_page should be atomic so that we cannot wait on writeback in there so the approach in this patch is simply return false if we found it needs stable page. Although it increases memory footprint temporarily, it happens rarely and it should be reclaimed easily althoug it happened. Also, It would be better than waiting of IO completion, which is critial path for application latency. Fixes: da9556a2367c ("zram: user per-cpu compression streams") Link: http://lkml.kernel.org/r/20161120233015.GA14113@bbox Link: http://lkml.kernel.org/r/1482366980-3782-2-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Acked-by: Hugh Dickins Cc: Sergey Senozhatsky Cc: Darrick J. Wong Cc: Takashi Iwai Cc: Hyeoncheol Lee Cc: Cc: Sangseok Lee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/swap.h | 3 ++- mm/swapfile.c | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index a56523cefb9b..55ff5593c193 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -150,8 +150,9 @@ enum { SWP_FILE = (1 << 7), /* set after swap_activate success */ SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */ SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */ + SWP_STABLE_WRITES = (1 << 10), /* no overwrite PG_writeback pages */ /* add others here before... */ - SWP_SCANNING = (1 << 10), /* refcount in scan_swap_map */ + SWP_SCANNING = (1 << 11), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32UL diff --git a/mm/swapfile.c b/mm/swapfile.c index f30438970cd1..d76b2a18f044 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -943,11 +943,25 @@ bool reuse_swap_page(struct page *page, int *total_mapcount) count = page_trans_huge_mapcount(page, total_mapcount); if (count <= 1 && PageSwapCache(page)) { count += page_swapcount(page); - if (count == 1 && !PageWriteback(page)) { + if (count != 1) + goto out; + if (!PageWriteback(page)) { delete_from_swap_cache(page); SetPageDirty(page); + } else { + swp_entry_t entry; + struct swap_info_struct *p; + + entry.val = page_private(page); + p = swap_info_get(entry); + if (p->flags & SWP_STABLE_WRITES) { + spin_unlock(&p->lock); + return false; + } + spin_unlock(&p->lock); } } +out: return count <= 1; } @@ -2449,6 +2463,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = -ENOMEM; goto bad_swap; } + + if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) + p->flags |= SWP_STABLE_WRITES; + if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { int cpu; -- GitLab From 8315c22ea879082bba365d46dd2cc7881fbfb49a Mon Sep 17 00:00:00 2001 From: John Sperbeck Date: Tue, 10 Jan 2017 16:58:24 -0800 Subject: [PATCH 0495/1442] mm/slab.c: fix SLAB freelist randomization duplicate entries commit c4e490cf148e85ead0d1b1c2caaba833f1d5b29f upstream. This patch fixes a bug in the freelist randomization code. When a high random number is used, the freelist will contain duplicate entries. It will result in different allocations sharing the same chunk. It will result in odd behaviours and crashes. It should be uncommon but it depends on the machines. We saw it happening more often on some machines (every few hours of running tests). Fixes: c7ce4f60ac19 ("mm: SLAB freelist randomization") Link: http://lkml.kernel.org/r/20170103181908.143178-1-thgarnie@google.com Signed-off-by: John Sperbeck Signed-off-by: Thomas Garnier Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slab.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 0b0550ca85b4..bd878f051a3b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2475,7 +2475,6 @@ union freelist_init_state { unsigned int pos; unsigned int *list; unsigned int count; - unsigned int rand; }; struct rnd_state rnd_state; }; @@ -2501,8 +2500,7 @@ static bool freelist_state_initialize(union freelist_init_state *state, } else { state->list = cachep->random_seq; state->count = count; - state->pos = 0; - state->rand = rand; + state->pos = rand % count; ret = true; } return ret; @@ -2511,7 +2509,9 @@ static bool freelist_state_initialize(union freelist_init_state *state, /* Get the next entry on the list and randomize it using a random shift */ static freelist_idx_t next_random_slot(union freelist_init_state *state) { - return (state->list[state->pos++] + state->rand) % state->count; + if (state->pos >= state->count) + state->pos = 0; + return state->list[state->pos++]; } /* Swap two freelist entries */ -- GitLab From 1e26cec60668091201d490dd8709352696336211 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 10 Jan 2017 16:58:27 -0800 Subject: [PATCH 0496/1442] mm/hugetlb.c: fix reservation race when freeing surplus pages commit e5bbc8a6c992901058bc09e2ce01d16c111ff047 upstream. return_unused_surplus_pages() decrements the global reservation count, and frees any unused surplus pages that were backing the reservation. Commit 7848a4bf51b3 ("mm/hugetlb.c: add cond_resched_lock() in return_unused_surplus_pages()") added a call to cond_resched_lock in the loop freeing the pages. As a result, the hugetlb_lock could be dropped, and someone else could use the pages that will be freed in subsequent iterations of the loop. This could result in inconsistent global hugetlb page state, application api failures (such as mmap) failures or application crashes. When dropping the lock in return_unused_surplus_pages, make sure that the global reservation count (resv_huge_pages) remains sufficiently large to prevent someone else from claiming pages about to be freed. Analyzed by Paul Cassella. Fixes: 7848a4bf51b3 ("mm/hugetlb.c: add cond_resched_lock() in return_unused_surplus_pages()") Link: http://lkml.kernel.org/r/1483991767-6879-1-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reported-by: Paul Cassella Suggested-by: Michal Hocko Cc: Masayoshi Mizuma Cc: Naoya Horiguchi Cc: Aneesh Kumar Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 23aec01836aa..b6adedbafaf5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1773,23 +1773,32 @@ static int gather_surplus_pages(struct hstate *h, int delta) } /* - * When releasing a hugetlb pool reservation, any surplus pages that were - * allocated to satisfy the reservation must be explicitly freed if they were - * never used. - * Called with hugetlb_lock held. + * This routine has two main purposes: + * 1) Decrement the reservation count (resv_huge_pages) by the value passed + * in unused_resv_pages. This corresponds to the prior adjustments made + * to the associated reservation map. + * 2) Free any unused surplus pages that may have been allocated to satisfy + * the reservation. As many as unused_resv_pages may be freed. + * + * Called with hugetlb_lock held. However, the lock could be dropped (and + * reacquired) during calls to cond_resched_lock. Whenever dropping the lock, + * we must make sure nobody else can claim pages we are in the process of + * freeing. Do this by ensuring resv_huge_page always is greater than the + * number of huge pages we plan to free when dropping the lock. */ static void return_unused_surplus_pages(struct hstate *h, unsigned long unused_resv_pages) { unsigned long nr_pages; - /* Uncommit the reservation */ - h->resv_huge_pages -= unused_resv_pages; - /* Cannot return gigantic pages currently */ if (hstate_is_gigantic(h)) - return; + goto out; + /* + * Part (or even all) of the reservation could have been backed + * by pre-allocated pages. Only free surplus pages. + */ nr_pages = min(unused_resv_pages, h->surplus_huge_pages); /* @@ -1799,12 +1808,22 @@ static void return_unused_surplus_pages(struct hstate *h, * when the nodes with surplus pages have no free pages. * free_pool_huge_page() will balance the the freed pages across the * on-line nodes with memory and will handle the hstate accounting. + * + * Note that we decrement resv_huge_pages as we free the pages. If + * we drop the lock, resv_huge_pages will still be sufficiently large + * to cover subsequent pages we may free. */ while (nr_pages--) { + h->resv_huge_pages--; + unused_resv_pages--; if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1)) - break; + goto out; cond_resched_lock(&hugetlb_lock); } + +out: + /* Fully uncommit the reservation */ + h->resv_huge_pages -= unused_resv_pages; } -- GitLab From 7718ffcf9a64830bbae148432f625346cde2f2d6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Jan 2017 15:02:32 +0100 Subject: [PATCH 0497/1442] KVM: x86: fix emulation of "MOV SS, null selector" commit 33ab91103b3415e12457e3104f0e4517ce12d0f3 upstream. This is CVE-2017-2583. On Intel this causes a failed vmentry because SS's type is neither 3 nor 7 (even though the manual says this check is only done for usable SS, and the dmesg splat says that SS is unusable!). On AMD it's worse: svm.c is confused and sets CPL to 0 in the vmcb. The fix fabricates a data segment descriptor when SS is set to a null selector, so that CPL and SS.DPL are set correctly in the VMCS/vmcb. Furthermore, only allow setting SS to a NULL selector if SS.RPL < 3; this in turn ensures CPL < 3 because RPL must be equal to CPL. Thanks to Andy Lutomirski and Willy Tarreau for help in analyzing the bug and deciphering the manuals. Reported-by: Xiaohan Zhang Fixes: 79d5b4c3cd809c770d4bf9812635647016c56011 Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a3ce9d260d68..40364cd03c6b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1544,7 +1544,6 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, &ctxt->exception); } -/* Does not support long mode */ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg, u8 cpl, enum x86_transfer_type transfer, @@ -1581,20 +1580,34 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, rpl = selector & 3; - /* NULL selector is not valid for TR, CS and SS (except for long mode) */ - if ((seg == VCPU_SREG_CS - || (seg == VCPU_SREG_SS - && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)) - || seg == VCPU_SREG_TR) - && null_selector) - goto exception; - /* TR should be in GDT only */ if (seg == VCPU_SREG_TR && (selector & (1 << 2))) goto exception; - if (null_selector) /* for NULL selector skip all following checks */ + /* NULL selector is not valid for TR, CS and (except for long mode) SS */ + if (null_selector) { + if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR) + goto exception; + + if (seg == VCPU_SREG_SS) { + if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl) + goto exception; + + /* + * ctxt->ops->set_segment expects the CPL to be in + * SS.DPL, so fake an expand-up 32-bit data segment. + */ + seg_desc.type = 3; + seg_desc.p = 1; + seg_desc.s = 1; + seg_desc.dpl = cpl; + seg_desc.d = 1; + seg_desc.g = 1; + } + + /* Skip all following checks */ goto load; + } ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr); if (ret != X86EMUL_CONTINUE) @@ -1710,6 +1723,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg) { u8 cpl = ctxt->ops->cpl(ctxt); + + /* + * None of MOV, POP and LSS can load a NULL selector in CPL=3, but + * they can load it at CPL<3 (Intel's manual says only LSS can, + * but it's wrong). + * + * However, the Intel manual says that putting IST=1/DPL=3 in + * an interrupt gate will result in SS=3 (the AMD manual instead + * says it doesn't), so allow SS=3 in __load_segment_descriptor + * and only forbid it here. + */ + if (seg == VCPU_SREG_SS && selector == 3 && + ctxt->mode == X86EMUL_MODE_PROT64) + return emulate_exception(ctxt, GP_VECTOR, 0, true); + return __load_segment_descriptor(ctxt, selector, seg, cpl, X86_TRANSFER_NONE, NULL); } -- GitLab From 7caf473f99b8c3537cc2196d3d508dd6c139048b Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 5 Jan 2017 17:39:42 -0800 Subject: [PATCH 0498/1442] KVM: eventfd: fix NULL deref irqbypass consumer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4f3dbdf47e150016aacd734e663347fcaa768303 upstream. Reported syzkaller: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass] PGD 0 Oops: 0002 [#1] SMP CPU: 1 PID: 125 Comm: kworker/1:1 Not tainted 4.9.0+ #1 Workqueue: kvm-irqfd-cleanup irqfd_shutdown [kvm] task: ffff9bbe0dfbb900 task.stack: ffffb61802014000 RIP: 0010:irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass] Call Trace: irqfd_shutdown+0x66/0xa0 [kvm] process_one_work+0x16b/0x480 worker_thread+0x4b/0x500 kthread+0x101/0x140 ? process_one_work+0x480/0x480 ? kthread_create_on_node+0x60/0x60 ret_from_fork+0x25/0x30 RIP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass] RSP: ffffb61802017e20 CR2: 0000000000000008 The syzkaller folks reported a NULL pointer dereference that due to unregister an consumer which fails registration before. The syzkaller creates two VMs w/ an equal eventfd occasionally. So the second VM fails to register an irqbypass consumer. It will make irqfd as inactive and queue an workqueue work to shutdown irqfd and unregister the irqbypass consumer when eventfd is closed. However, the second consumer has been initialized though it fails registration. So the token(same as the first VM's) is taken to unregister the consumer through the workqueue, the consumer of the first VM is found and unregistered, then NULL deref incurred in the path of deleting consumer from the consumers list. This patch fixes it by making irq_bypass_register/unregister_consumer() looks for the consumer entry based on consumer pointer itself instead of token matching. Reported-by: Dmitry Vyukov Suggested-by: Alex Williamson Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Cc: Alex Williamson Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/lib/irqbypass.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c index 52abac4bb6a2..6d2fcd6fcb25 100644 --- a/virt/lib/irqbypass.c +++ b/virt/lib/irqbypass.c @@ -195,7 +195,7 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) mutex_lock(&lock); list_for_each_entry(tmp, &consumers, node) { - if (tmp->token == consumer->token) { + if (tmp->token == consumer->token || tmp == consumer) { mutex_unlock(&lock); module_put(THIS_MODULE); return -EBUSY; @@ -245,7 +245,7 @@ void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer) mutex_lock(&lock); list_for_each_entry(tmp, &consumers, node) { - if (tmp->token != consumer->token) + if (tmp != consumer) continue; list_for_each_entry(producer, &producers, node) { -- GitLab From 483ecebb22c16369ecb607e4bfd5f9767b56555f Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 16 Dec 2016 14:30:35 -0800 Subject: [PATCH 0499/1442] jump_labels: API for flushing deferred jump label updates commit b6416e61012429e0277bd15a229222fd17afc1c1 upstream. Modules that use static_key_deferred need a way to synchronize with any delayed work that is still pending when the module is unloaded. Introduce static_key_deferred_flush() which flushes any pending jump label updates. Signed-off-by: David Matlack Acked-by: Peter Zijlstra (Intel) Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- include/linux/jump_label_ratelimit.h | 5 +++++ kernel/jump_label.c | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h index 089f70f83e97..23da3af459fe 100644 --- a/include/linux/jump_label_ratelimit.h +++ b/include/linux/jump_label_ratelimit.h @@ -14,6 +14,7 @@ struct static_key_deferred { #ifdef HAVE_JUMP_LABEL extern void static_key_slow_dec_deferred(struct static_key_deferred *key); +extern void static_key_deferred_flush(struct static_key_deferred *key); extern void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); @@ -26,6 +27,10 @@ static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) STATIC_KEY_CHECK_USE(); static_key_slow_dec(&key->key); } +static inline void static_key_deferred_flush(struct static_key_deferred *key) +{ + STATIC_KEY_CHECK_USE(); +} static inline void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 93ad6c1fb9b6..a9b8cf500591 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -182,6 +182,13 @@ void static_key_slow_dec_deferred(struct static_key_deferred *key) } EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); +void static_key_deferred_flush(struct static_key_deferred *key) +{ + STATIC_KEY_CHECK_USE(); + flush_delayed_work(&key->work); +} +EXPORT_SYMBOL_GPL(static_key_deferred_flush); + void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { -- GitLab From 5ed21cc0cf2650756167dfd492799e214a69384e Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 16 Dec 2016 14:30:36 -0800 Subject: [PATCH 0500/1442] KVM: x86: flush pending lapic jump label updates on module unload commit cef84c302fe051744b983a92764d3fcca933415d upstream. KVM's lapic emulation uses static_key_deferred (apic_{hw,sw}_disabled). These are implemented with delayed_work structs which can still be pending when the KVM module is unloaded. We've seen this cause kernel panics when the kvm_intel module is quickly reloaded. Use the new static_key_deferred_flush() API to flush pending updates on module unload. Signed-off-by: David Matlack Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 6 ++++++ arch/x86/kvm/lapic.h | 1 + arch/x86/kvm/x86.c | 1 + 3 files changed, 8 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6f69340f9fa3..3f05c044720b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2360,3 +2360,9 @@ void kvm_lapic_init(void) jump_label_rate_limit(&apic_hw_disabled, HZ); jump_label_rate_limit(&apic_sw_disabled, HZ); } + +void kvm_lapic_exit(void) +{ + static_key_deferred_flush(&apic_hw_disabled); + static_key_deferred_flush(&apic_sw_disabled); +} diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index f60d01c29d51..4dfe4d6cb338 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -108,6 +108,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); void kvm_lapic_init(void); +void kvm_lapic_exit(void); #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f3648c978d2f..626466988288 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5963,6 +5963,7 @@ int kvm_arch_init(void *opaque) void kvm_arch_exit(void) { + kvm_lapic_exit(); perf_unregister_guest_info_callbacks(&kvm_guest_cbs); if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) -- GitLab From 90f70fcd6f72e5d7797866f8bae78ac504b9de31 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 3 Jan 2017 18:56:19 -0800 Subject: [PATCH 0501/1442] KVM: x86: fix NULL deref in vcpu_scan_ioapic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 546d87e5c903a7f3ee7b9f998949a94729fbc65b upstream. Reported by syzkaller: BUG: unable to handle kernel NULL pointer dereference at 00000000000001b0 IP: _raw_spin_lock+0xc/0x30 PGD 3e28eb067 PUD 3f0ac6067 PMD 0 Oops: 0002 [#1] SMP CPU: 0 PID: 2431 Comm: test Tainted: G OE 4.10.0-rc1+ #3 Call Trace: ? kvm_ioapic_scan_entry+0x3e/0x110 [kvm] kvm_arch_vcpu_ioctl_run+0x10a8/0x15f0 [kvm] ? pick_next_task_fair+0xe1/0x4e0 ? kvm_arch_vcpu_load+0xea/0x260 [kvm] kvm_vcpu_ioctl+0x33a/0x600 [kvm] ? hrtimer_try_to_cancel+0x29/0x130 ? do_nanosleep+0x97/0xf0 do_vfs_ioctl+0xa1/0x5d0 ? __hrtimer_init+0x90/0x90 ? do_nanosleep+0x5b/0xf0 SyS_ioctl+0x79/0x90 do_syscall_64+0x6e/0x180 entry_SYSCALL64_slow_path+0x25/0x25 RIP: _raw_spin_lock+0xc/0x30 RSP: ffffa43688973cc0 The syzkaller folks reported a NULL pointer dereference due to ENABLE_CAP succeeding even without an irqchip. The Hyper-V synthetic interrupt controller is activated, resulting in a wrong request to rescan the ioapic and a NULL pointer dereference. #include #include #include #include #include #include #include #include #include #include #ifndef KVM_CAP_HYPERV_SYNIC #define KVM_CAP_HYPERV_SYNIC 123 #endif void* thr(void* arg) { struct kvm_enable_cap cap; cap.flags = 0; cap.cap = KVM_CAP_HYPERV_SYNIC; ioctl((long)arg, KVM_ENABLE_CAP, &cap); return 0; } int main() { void *host_mem = mmap(0, 0x1000, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); int kvmfd = open("/dev/kvm", 0); int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0); struct kvm_userspace_memory_region memreg; memreg.slot = 0; memreg.flags = 0; memreg.guest_phys_addr = 0; memreg.memory_size = 0x1000; memreg.userspace_addr = (unsigned long)host_mem; host_mem[0] = 0xf4; ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &memreg); int cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0); struct kvm_sregs sregs; ioctl(cpufd, KVM_GET_SREGS, &sregs); sregs.cr0 = 0; sregs.cr4 = 0; sregs.efer = 0; sregs.cs.selector = 0; sregs.cs.base = 0; ioctl(cpufd, KVM_SET_SREGS, &sregs); struct kvm_regs regs = { .rflags = 2 }; ioctl(cpufd, KVM_SET_REGS, ®s); ioctl(vmfd, KVM_CREATE_IRQCHIP, 0); pthread_t th; pthread_create(&th, 0, thr, (void*)(long)cpufd); usleep(rand() % 10000); ioctl(cpufd, KVM_RUN, 0); pthread_join(th, 0); return 0; } This patch fixes it by failing ENABLE_CAP if without an irqchip. Reported-by: Dmitry Vyukov Fixes: 5c919412fe61 (kvm/x86: Hyper-V synthetic interrupt controller) Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 626466988288..487b957e7802 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3308,6 +3308,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, switch (cap->cap) { case KVM_CAP_HYPERV_SYNIC: + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; return kvm_hv_activate_synic(vcpu); default: return -EINVAL; -- GitLab From bc5e1316efd6758beae4e7fe17daaef8dcd43ae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 8 Nov 2016 20:54:16 +0100 Subject: [PATCH 0502/1442] KVM: x86: add Align16 instruction flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d3fe959f81024072068e9ed86b39c2acfd7462a9 upstream. Needed for FXSAVE and FXRSTOR. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 40364cd03c6b..7a6f9fa20485 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -171,6 +171,7 @@ #define NearBranch ((u64)1 << 52) /* Near branches */ #define No16 ((u64)1 << 53) /* No 16 bit operand */ #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ +#define Aligned16 ((u64)1 << 55) /* Aligned to 16 byte boundary (e.g. FXSAVE) */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -632,21 +633,24 @@ static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, * depending on whether they're AVX encoded or not. * * Also included is CMPXCHG16B which is not a vector instruction, yet it is - * subject to the same check. + * subject to the same check. FXSAVE and FXRSTOR are checked here too as their + * 512 bytes of data must be aligned to a 16 byte boundary. */ -static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size) +static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size) { if (likely(size < 16)) - return false; + return 1; if (ctxt->d & Aligned) - return true; + return size; else if (ctxt->d & Unaligned) - return false; + return 1; else if (ctxt->d & Avx) - return false; + return 1; + else if (ctxt->d & Aligned16) + return 16; else - return true; + return size; } static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, @@ -704,7 +708,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, } break; } - if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) + if (la & (insn_alignment(ctxt, size) - 1)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; bad: -- GitLab From aae8f3464b1f4daee192b59f5d56d31ff2098b68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 8 Nov 2016 20:54:18 +0100 Subject: [PATCH 0503/1442] KVM: x86: add asm_safe wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit aabba3c6abd50b05b1fc2c6ec44244aa6bcda576 upstream. Move the existing exception handling for inline assembly into a macro and switch its return values to X86EMUL type. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 7a6f9fa20485..a1427f740998 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -447,6 +447,26 @@ FOP_END; FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET FOP_END; +/* + * XXX: inoutclob user must know where the argument is being expanded. + * Relying on CC_HAVE_ASM_GOTO would allow us to remove _fault. + */ +#define asm_safe(insn, inoutclob...) \ +({ \ + int _fault = 0; \ + \ + asm volatile("1:" insn "\n" \ + "2:\n" \ + ".pushsection .fixup, \"ax\"\n" \ + "3: movl $1, %[_fault]\n" \ + " jmp 2b\n" \ + ".popsection\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [_fault] "+qm"(_fault) inoutclob ); \ + \ + _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \ +}) + static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, enum x86_intercept intercept, enum x86_intercept_stage stage) @@ -5098,21 +5118,13 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { - bool fault = false; + int rc; ctxt->ops->get_fpu(ctxt); - asm volatile("1: fwait \n\t" - "2: \n\t" - ".pushsection .fixup,\"ax\" \n\t" - "3: \n\t" - "movb $1, %[fault] \n\t" - "jmp 2b \n\t" - ".popsection \n\t" - _ASM_EXTABLE(1b, 3b) - : [fault]"+qm"(fault)); + rc = asm_safe("fwait"); ctxt->ops->put_fpu(ctxt); - if (unlikely(fault)) + if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); return X86EMUL_CONTINUE; -- GitLab From 83fedbb76051003fe734653852efc173ae04cb87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 9 Nov 2016 19:07:06 +0100 Subject: [PATCH 0504/1442] KVM: x86: emulate FXSAVE and FXRSTOR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 283c95d0e3891b64087706b344a4b545d04a6e62 upstream. Internal errors were reported on 16 bit fxsave and fxrstor with ipxe. Old Intels don't have unrestricted_guest, so we have to emulate them. The patch takes advantage of the hardware implementation. AMD and Intel differ in saving and restoring other fields in first 32 bytes. A test wrote 0xff to the fxsave area, 0 to upper bits of MCSXR in the fxsave area, executed fxrstor, rewrote the fxsave area to 0xee, and executed fxsave: Intel (Nehalem): 7f 1f 7f 7f ff 00 ff 07 ff ff ff ff ff ff 00 00 ff ff ff ff ff ff 00 00 ff ff 00 00 ff ff 00 00 Intel (Haswell -- deprecated FPU CS and FPU DS): 7f 1f 7f 7f ff 00 ff 07 ff ff ff ff 00 00 00 00 ff ff ff ff 00 00 00 00 ff ff 00 00 ff ff 00 00 AMD (Opteron 2300-series): 7f 1f 7f 7f ff 00 ee ee ee ee ee ee ee ee ee ee ee ee ee ee ee ee ee ee ff ff 00 00 ff ff 02 00 fxsave/fxrstor will only be emulated on early Intels, so KVM can't do much to improve the situation. Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 129 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a1427f740998..8994d23bc45d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3894,6 +3894,131 @@ static int em_movsxd(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int check_fxsr(struct x86_emulate_ctxt *ctxt) +{ + u32 eax = 1, ebx, ecx = 0, edx; + + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); + if (!(edx & FFL(FXSR))) + return emulate_ud(ctxt); + + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) + return emulate_nm(ctxt); + + /* + * Don't emulate a case that should never be hit, instead of working + * around a lack of fxsave64/fxrstor64 on old compilers. + */ + if (ctxt->mode >= X86EMUL_MODE_PROT64) + return X86EMUL_UNHANDLEABLE; + + return X86EMUL_CONTINUE; +} + +/* + * FXSAVE and FXRSTOR have 4 different formats depending on execution mode, + * 1) 16 bit mode + * 2) 32 bit mode + * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs + * preserve whole 32 bit values, though, so (1) and (2) are the same wrt. + * save and restore + * 3) 64-bit mode with REX.W prefix + * - like (2), but XMM 8-15 are being saved and restored + * 4) 64-bit mode without REX.W prefix + * - like (3), but FIP and FDP are 64 bit + * + * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the + * desired result. (4) is not emulated. + * + * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS + * and FPU DS) should match. + */ +static int em_fxsave(struct x86_emulate_ctxt *ctxt) +{ + struct fxregs_state fx_state; + size_t size; + int rc; + + rc = check_fxsr(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + + ctxt->ops->get_fpu(ctxt); + + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + + ctxt->ops->put_fpu(ctxt); + + if (rc != X86EMUL_CONTINUE) + return rc; + + if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR) + size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]); + else + size = offsetof(struct fxregs_state, xmm_space[0]); + + return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size); +} + +static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt, + struct fxregs_state *new) +{ + int rc = X86EMUL_CONTINUE; + struct fxregs_state old; + + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old)); + if (rc != X86EMUL_CONTINUE) + return rc; + + /* + * 64 bit host will restore XMM 8-15, which is not correct on non-64 + * bit guests. Load the current values in order to preserve 64 bit + * XMMs after fxrstor. + */ +#ifdef CONFIG_X86_64 + /* XXX: accessing XMM 8-15 very awkwardly */ + memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16); +#endif + + /* + * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but + * does save and restore MXCSR. + */ + if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) + memcpy(new->xmm_space, old.xmm_space, 8 * 16); + + return rc; +} + +static int em_fxrstor(struct x86_emulate_ctxt *ctxt) +{ + struct fxregs_state fx_state; + int rc; + + rc = check_fxsr(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + + rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512); + if (rc != X86EMUL_CONTINUE) + return rc; + + if (fx_state.mxcsr >> 16) + return emulate_gp(ctxt, 0); + + ctxt->ops->get_fpu(ctxt); + + if (ctxt->mode < X86EMUL_MODE_PROT64) + rc = fxrstor_fixup(ctxt, &fx_state); + + if (rc == X86EMUL_CONTINUE) + rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); + + ctxt->ops->put_fpu(ctxt); + + return rc; +} + static bool valid_cr(int nr) { switch (nr) { @@ -4246,7 +4371,9 @@ static const struct gprefix pfx_0f_ae_7 = { }; static const struct group_dual group15 = { { - N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7), + I(ModRM | Aligned16, em_fxsave), + I(ModRM | Aligned16, em_fxrstor), + N, N, N, N, N, GP(0, &pfx_0f_ae_7), }, { N, N, N, N, N, N, N, N, } }; -- GitLab From 736e77c07fba8b49cead504b885a82ce52c0ff10 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 11 Jan 2017 18:28:29 -0800 Subject: [PATCH 0505/1442] KVM: x86: Introduce segmented_write_std commit 129a72a0d3c8e139a04512325384fe5ac119e74d upstream. Introduces segemented_write_std. Switches from emulated reads/writes to standard read/writes in fxsave, fxrstor, sgdt, and sidt. This fixes CVE-2017-2584, a longstanding kernel memory leak. Since commit 283c95d0e389 ("KVM: x86: emulate FXSAVE and FXRSTOR", 2016-11-09), which is luckily not yet in any final release, this would also be an exploitable kernel memory *write*! Reported-by: Dmitry Vyukov Fixes: 96051572c819194c37a8367624b285be10297eca Fixes: 283c95d0e3891b64087706b344a4b545d04a6e62 Suggested-by: Paolo Bonzini Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8994d23bc45d..9f676adcdfc2 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -815,6 +815,20 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); } +static int segmented_write_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned int size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); +} + /* * Prefetch the remaining bytes of the instruction without crossing page * boundary if they are not in fetch_cache yet. @@ -3710,8 +3724,8 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt, } /* Disable writeback. */ ctxt->dst.type = OP_NONE; - return segmented_write(ctxt, ctxt->dst.addr.mem, - &desc_ptr, 2 + ctxt->op_bytes); + return segmented_write_std(ctxt, ctxt->dst.addr.mem, + &desc_ptr, 2 + ctxt->op_bytes); } static int em_sgdt(struct x86_emulate_ctxt *ctxt) @@ -3957,7 +3971,7 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) else size = offsetof(struct fxregs_state, xmm_space[0]); - return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size); + return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); } static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt, @@ -3999,7 +4013,7 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512); + rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512); if (rc != X86EMUL_CONTINUE) return rc; -- GitLab From 74ce3fd64bc44f89856ff57bf684882dc098f93b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 24 Dec 2016 13:59:23 +0000 Subject: [PATCH 0506/1442] efi/libstub/arm*: Pass latest memory map to the kernel commit abfb7b686a3e5be27bf81db62f9c5c895b76f5d1 upstream. As reported by James Morse, the current libstub code involving the annotated memory map only works somewhat correctly by accident, due to the fact that a pool allocation happens to be reused immediately, retaining its former contents on most implementations of the UEFI boot services. Instead of juggling memory maps, which makes the code more complex than it needs to be, simply put placeholder values into the FDT for the memory map parameters, and only write the actual values after ExitBootServices() has been called. Reported-by: James Morse Signed-off-by: Ard Biesheuvel Cc: Jeffrey Hugo Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: linux-efi@vger.kernel.org Fixes: ed9cc156c42f ("efi/libstub: Use efi_exit_boot_services() in FDT") Link: http://lkml.kernel.org/r/1482587963-20183-2-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/efistub.h | 8 --- drivers/firmware/efi/libstub/fdt.c | 87 +++++++++++++++++--------- 2 files changed, 56 insertions(+), 39 deletions(-) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index ee49cd23ee63..fac67992bede 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -30,14 +30,6 @@ efi_status_t efi_file_close(void *handle); unsigned long get_dram_base(efi_system_table_t *sys_table_arg); -efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, - unsigned long orig_fdt_size, - void *fdt, int new_fdt_size, char *cmdline_ptr, - u64 initrd_addr, u64 initrd_size, - efi_memory_desc_t *memory_map, - unsigned long map_size, unsigned long desc_size, - u32 desc_ver); - efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, void *handle, unsigned long *new_fdt_addr, diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index a6a93116a8f0..921dfa047202 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -16,13 +16,10 @@ #include "efistub.h" -efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, - unsigned long orig_fdt_size, - void *fdt, int new_fdt_size, char *cmdline_ptr, - u64 initrd_addr, u64 initrd_size, - efi_memory_desc_t *memory_map, - unsigned long map_size, unsigned long desc_size, - u32 desc_ver) +static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, + unsigned long orig_fdt_size, + void *fdt, int new_fdt_size, char *cmdline_ptr, + u64 initrd_addr, u64 initrd_size) { int node, num_rsv; int status; @@ -101,25 +98,23 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, if (status) goto fdt_set_fail; - fdt_val64 = cpu_to_fdt64((u64)(unsigned long)memory_map); + fdt_val64 = U64_MAX; /* placeholder */ status = fdt_setprop(fdt, node, "linux,uefi-mmap-start", &fdt_val64, sizeof(fdt_val64)); if (status) goto fdt_set_fail; - fdt_val32 = cpu_to_fdt32(map_size); + fdt_val32 = U32_MAX; /* placeholder */ status = fdt_setprop(fdt, node, "linux,uefi-mmap-size", &fdt_val32, sizeof(fdt_val32)); if (status) goto fdt_set_fail; - fdt_val32 = cpu_to_fdt32(desc_size); status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-size", &fdt_val32, sizeof(fdt_val32)); if (status) goto fdt_set_fail; - fdt_val32 = cpu_to_fdt32(desc_ver); status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-ver", &fdt_val32, sizeof(fdt_val32)); if (status) @@ -148,6 +143,43 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, return EFI_LOAD_ERROR; } +static efi_status_t update_fdt_memmap(void *fdt, struct efi_boot_memmap *map) +{ + int node = fdt_path_offset(fdt, "/chosen"); + u64 fdt_val64; + u32 fdt_val32; + int err; + + if (node < 0) + return EFI_LOAD_ERROR; + + fdt_val64 = cpu_to_fdt64((unsigned long)*map->map); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-start", + &fdt_val64, sizeof(fdt_val64)); + if (err) + return EFI_LOAD_ERROR; + + fdt_val32 = cpu_to_fdt32(*map->map_size); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-size", + &fdt_val32, sizeof(fdt_val32)); + if (err) + return EFI_LOAD_ERROR; + + fdt_val32 = cpu_to_fdt32(*map->desc_size); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-desc-size", + &fdt_val32, sizeof(fdt_val32)); + if (err) + return EFI_LOAD_ERROR; + + fdt_val32 = cpu_to_fdt32(*map->desc_ver); + err = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-desc-ver", + &fdt_val32, sizeof(fdt_val32)); + if (err) + return EFI_LOAD_ERROR; + + return EFI_SUCCESS; +} + #ifndef EFI_FDT_ALIGN #define EFI_FDT_ALIGN EFI_PAGE_SIZE #endif @@ -243,20 +275,10 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, goto fail; } - /* - * Now that we have done our final memory allocation (and free) - * we can get the memory map key needed for - * exit_boot_services(). - */ - status = efi_get_memory_map(sys_table, &map); - if (status != EFI_SUCCESS) - goto fail_free_new_fdt; - status = update_fdt(sys_table, (void *)fdt_addr, fdt_size, (void *)*new_fdt_addr, new_fdt_size, - cmdline_ptr, initrd_addr, initrd_size, - memory_map, map_size, desc_size, desc_ver); + cmdline_ptr, initrd_addr, initrd_size); /* Succeeding the first time is the expected case. */ if (status == EFI_SUCCESS) @@ -266,20 +288,16 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, /* * We need to allocate more space for the new * device tree, so free existing buffer that is - * too small. Also free memory map, as we will need - * to get new one that reflects the free/alloc we do - * on the device tree buffer. + * too small. */ efi_free(sys_table, new_fdt_size, *new_fdt_addr); - sys_table->boottime->free_pool(memory_map); new_fdt_size += EFI_PAGE_SIZE; } else { pr_efi_err(sys_table, "Unable to construct new device tree.\n"); - goto fail_free_mmap; + goto fail_free_new_fdt; } } - sys_table->boottime->free_pool(memory_map); priv.runtime_map = runtime_map; priv.runtime_entry_count = &runtime_entry_count; status = efi_exit_boot_services(sys_table, handle, &map, &priv, @@ -288,6 +306,16 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, if (status == EFI_SUCCESS) { efi_set_virtual_address_map_t *svam; + status = update_fdt_memmap((void *)*new_fdt_addr, &map); + if (status != EFI_SUCCESS) { + /* + * The kernel won't get far without the memory map, but + * may still be able to print something meaningful so + * return success here. + */ + return EFI_SUCCESS; + } + /* Install the new virtual address map */ svam = sys_table->runtime->set_virtual_address_map; status = svam(runtime_entry_count * desc_size, desc_size, @@ -319,9 +347,6 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, pr_efi_err(sys_table, "Exit boot services failed.\n"); -fail_free_mmap: - sys_table->boottime->free_pool(memory_map); - fail_free_new_fdt: efi_free(sys_table, new_fdt_size, *new_fdt_addr); -- GitLab From 99b17ac0014be19906669dd51d26a78d14363d1f Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Mon, 12 Dec 2016 18:42:28 -0500 Subject: [PATCH 0507/1442] efi/x86: Prune invalid memory map entries and fix boot regression commit 0100a3e67a9cef64d72cd3a1da86f3ddbee50363 upstream. Some machines, such as the Lenovo ThinkPad W541 with firmware GNET80WW (2.28), include memory map entries with phys_addr=0x0 and num_pages=0. These machines fail to boot after the following commit, commit 8e80632fb23f ("efi/esrt: Use efi_mem_reserve() and avoid a kmalloc()") Fix this by removing such bogus entries from the memory map. Furthermore, currently the log output for this case (with efi=debug) looks like: [ 0.000000] efi: mem45: [Reserved | | | | | | | | | | | | ] range=[0x0000000000000000-0xffffffffffffffff] (0MB) This is clearly wrong, and also not as informative as it could be. This patch changes it so that if we find obviously invalid memory map entries, we print an error and skip those entries. It also detects the display of the address range calculation overflow, so the new output is: [ 0.000000] efi: [Firmware Bug]: Invalid EFI memory map entries: [ 0.000000] efi: mem45: [Reserved | | | | | | | | | | | | ] range=[0x0000000000000000-0x0000000000000000] (invalid) It also detects memory map sizes that would overflow the physical address, for example phys_addr=0xfffffffffffff000 and num_pages=0x0200000000000001, and prints: [ 0.000000] efi: [Firmware Bug]: Invalid EFI memory map entries: [ 0.000000] efi: mem45: [Reserved | | | | | | | | | | | | ] range=[phys_addr=0xfffffffffffff000-0x20ffffffffffffffff] (invalid) It then removes these entries from the memory map. Signed-off-by: Peter Jones Signed-off-by: Ard Biesheuvel [ardb: refactor for clarity with no functional changes, avoid PAGE_SHIFT] Signed-off-by: Matt Fleming [Matt: Include bugzilla info in commit log] Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://bugzilla.kernel.org/show_bug.cgi?id=191121 Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/efi/efi.c | 66 +++++++++++++++++++++++++++++++++++++ include/linux/efi.h | 1 + 2 files changed, 67 insertions(+) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 936a488d6cf6..274dfc481849 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -210,6 +210,70 @@ int __init efi_memblock_x86_reserve_range(void) return 0; } +#define OVERFLOW_ADDR_SHIFT (64 - EFI_PAGE_SHIFT) +#define OVERFLOW_ADDR_MASK (U64_MAX << OVERFLOW_ADDR_SHIFT) +#define U64_HIGH_BIT (~(U64_MAX >> 1)) + +static bool __init efi_memmap_entry_valid(const efi_memory_desc_t *md, int i) +{ + u64 end = (md->num_pages << EFI_PAGE_SHIFT) + md->phys_addr - 1; + u64 end_hi = 0; + char buf[64]; + + if (md->num_pages == 0) { + end = 0; + } else if (md->num_pages > EFI_PAGES_MAX || + EFI_PAGES_MAX - md->num_pages < + (md->phys_addr >> EFI_PAGE_SHIFT)) { + end_hi = (md->num_pages & OVERFLOW_ADDR_MASK) + >> OVERFLOW_ADDR_SHIFT; + + if ((md->phys_addr & U64_HIGH_BIT) && !(end & U64_HIGH_BIT)) + end_hi += 1; + } else { + return true; + } + + pr_warn_once(FW_BUG "Invalid EFI memory map entries:\n"); + + if (end_hi) { + pr_warn("mem%02u: %s range=[0x%016llx-0x%llx%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end_hi, end); + } else { + pr_warn("mem%02u: %s range=[0x%016llx-0x%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end); + } + return false; +} + +static void __init efi_clean_memmap(void) +{ + efi_memory_desc_t *out = efi.memmap.map; + const efi_memory_desc_t *in = out; + const efi_memory_desc_t *end = efi.memmap.map_end; + int i, n_removal; + + for (i = n_removal = 0; in < end; i++) { + if (efi_memmap_entry_valid(in, i)) { + if (out != in) + memcpy(out, in, efi.memmap.desc_size); + out = (void *)out + efi.memmap.desc_size; + } else { + n_removal++; + } + in = (void *)in + efi.memmap.desc_size; + } + + if (n_removal > 0) { + u64 size = efi.memmap.nr_map - n_removal; + + pr_warn("Removing %d invalid memory map entries.\n", n_removal); + efi_memmap_install(efi.memmap.phys_map, size); + } +} + void __init efi_print_memmap(void) { efi_memory_desc_t *md; @@ -472,6 +536,8 @@ void __init efi_init(void) } } + efi_clean_memmap(); + if (efi_enabled(EFI_DBG)) efi_print_memmap(); } diff --git a/include/linux/efi.h b/include/linux/efi.h index 2d089487d2da..fda79cdf9f10 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -103,6 +103,7 @@ typedef struct { #define EFI_PAGE_SHIFT 12 #define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) +#define EFI_PAGES_MAX (U64_MAX >> EFI_PAGE_SHIFT) typedef struct { u32 type; -- GitLab From 14d6c966744debbafd2f2815e052f2fed1dd154b Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Thu, 5 Jan 2017 13:51:29 +0100 Subject: [PATCH 0508/1442] x86/efi: Don't allocate memmap through memblock after mm_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 20b1e22d01a4b0b11d3a1066e9feb04be38607ec upstream. With the following commit: 4bc9f92e64c8 ("x86/efi-bgrt: Use efi_mem_reserve() to avoid copying image data") ... efi_bgrt_init() calls into the memblock allocator through efi_mem_reserve() => efi_arch_mem_reserve() *after* mm_init() has been called. Indeed, KASAN reports a bad read access later on in efi_free_boot_services(): BUG: KASAN: use-after-free in efi_free_boot_services+0xae/0x24c at addr ffff88022de12740 Read of size 4 by task swapper/0/0 page:ffffea0008b78480 count:0 mapcount:-127 mapping: (null) index:0x1 flags: 0x5fff8000000000() [...] Call Trace: dump_stack+0x68/0x9f kasan_report_error+0x4c8/0x500 kasan_report+0x58/0x60 __asan_load4+0x61/0x80 efi_free_boot_services+0xae/0x24c start_kernel+0x527/0x562 x86_64_start_reservations+0x24/0x26 x86_64_start_kernel+0x157/0x17a start_cpu+0x5/0x14 The instruction at the given address is the first read from the memmap's memory, i.e. the read of md->type in efi_free_boot_services(). Note that the writes earlier in efi_arch_mem_reserve() don't splat because they're done through early_memremap()ed addresses. So, after memblock is gone, allocations should be done through the "normal" page allocator. Introduce a helper, efi_memmap_alloc() for this. Use it from efi_arch_mem_reserve(), efi_free_boot_services() and, for the sake of consistency, from efi_fake_memmap() as well. Note that for the latter, the memmap allocations cease to be page aligned. This isn't needed though. Tested-by: Dan Williams Signed-off-by: Nicolai Stange Reviewed-by: Ard Biesheuvel Cc: Dave Young Cc: Linus Torvalds Cc: Matt Fleming Cc: Mika Penttilä Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 4bc9f92e64c8 ("x86/efi-bgrt: Use efi_mem_reserve() to avoid copying image data") Link: http://lkml.kernel.org/r/20170105125130.2815-1-nicstange@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/efi/quirks.c | 4 ++-- drivers/firmware/efi/fake_mem.c | 3 +-- drivers/firmware/efi/memmap.c | 38 +++++++++++++++++++++++++++++++++ include/linux/efi.h | 1 + 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 10aca63a50d7..30031d5293c4 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -214,7 +214,7 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) new_size = efi.memmap.desc_size * num_entries; - new_phys = memblock_alloc(new_size, 0); + new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { pr_err("Could not allocate boot services memmap\n"); return; @@ -355,7 +355,7 @@ void __init efi_free_boot_services(void) } new_size = efi.memmap.desc_size * num_entries; - new_phys = memblock_alloc(new_size, 0); + new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { pr_err("Failed to allocate new EFI memmap\n"); return; diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index 520a40e5e0e4..6c7d60c239b5 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -71,8 +71,7 @@ void __init efi_fake_memmap(void) } /* allocate memory for new EFI memmap */ - new_memmap_phy = memblock_alloc(efi.memmap.desc_size * new_nr_map, - PAGE_SIZE); + new_memmap_phy = efi_memmap_alloc(new_nr_map); if (!new_memmap_phy) return; diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index f03ddecd232b..78686443cb37 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -9,6 +9,44 @@ #include #include #include +#include +#include + +static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size) +{ + return memblock_alloc(size, 0); +} + +static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size) +{ + unsigned int order = get_order(size); + struct page *p = alloc_pages(GFP_KERNEL, order); + + if (!p) + return 0; + + return PFN_PHYS(page_to_pfn(p)); +} + +/** + * efi_memmap_alloc - Allocate memory for the EFI memory map + * @num_entries: Number of entries in the allocated map. + * + * Depending on whether mm_init() has already been invoked or not, + * either memblock or "normal" page allocation is used. + * + * Returns the physical address of the allocated memory map on + * success, zero on failure. + */ +phys_addr_t __init efi_memmap_alloc(unsigned int num_entries) +{ + unsigned long size = num_entries * efi.memmap.desc_size; + + if (slab_is_available()) + return __efi_memmap_alloc_late(size); + + return __efi_memmap_alloc_early(size); +} /** * __efi_memmap_init - Common code for mapping the EFI memory map diff --git a/include/linux/efi.h b/include/linux/efi.h index fda79cdf9f10..cba7177cbec7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -931,6 +931,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); +extern phys_addr_t __init efi_memmap_alloc(unsigned int num_entries); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); -- GitLab From 0a28f5393689576a7667a7ef42cb79eafe16b019 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Jan 2017 10:57:14 +0100 Subject: [PATCH 0509/1442] nl80211: fix sched scan netlink socket owner destruction commit 753aacfd2e95df6a0caf23c03dc309020765bea9 upstream. A single netlink socket might own multiple interfaces *and* a scheduled scan request (which might belong to another interface), so when it goes away both may need to be destroyed. Remove the schedule_scan_stop indirection to fix this - it's only needed for interface destruction because of the way this works right now, with a single work taking care of all interfaces. Fixes: 93a1e86ce10e4 ("nl80211: Stop scheduled scan if netlink client disappears") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a2dd6edaae37..1b3c18c2c1ec 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -14402,13 +14402,17 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { bool schedule_destroy_work = false; - bool schedule_scan_stop = false; struct cfg80211_sched_scan_request *sched_scan_req = rcu_dereference(rdev->sched_scan_req); if (sched_scan_req && notify->portid && - sched_scan_req->owner_nlportid == notify->portid) - schedule_scan_stop = true; + sched_scan_req->owner_nlportid == notify->portid) { + sched_scan_req->owner_nlportid = 0; + + if (rdev->ops->sched_scan_stop && + rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + schedule_work(&rdev->sched_scan_stop_wk); + } list_for_each_entry_rcu(wdev, &rdev->wiphy.wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); @@ -14439,12 +14443,6 @@ static int nl80211_netlink_notify(struct notifier_block * nb, spin_unlock(&rdev->destroy_list_lock); schedule_work(&rdev->destroy_work); } - } else if (schedule_scan_stop) { - sched_scan_req->owner_nlportid = 0; - - if (rdev->ops->sched_scan_stop && - rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) - schedule_work(&rdev->sched_scan_stop_wk); } } -- GitLab From 86673e9331c929f43ff4c89bee056273ac2e3ed5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 19 Dec 2016 18:29:23 +0100 Subject: [PATCH 0510/1442] gpio: Move freeing of GPIO hogs before numbing of the device commit 5018ada69a04c8ac21d74bd682fceb8e42dc0f96 upstream. When removing a gpiochip that uses GPIO hogging (e.g. by unloading the chip's DT overlay), a warning is printed: gpio gpiochip8: REMOVING GPIOCHIP WITH GPIOS STILL REQUESTED This happens because gpiochip_free_hogs() is called after the gdev->chip pointer is reset to NULL. Hence __gpiod_free() cannot determine the chip in use, and cannot clear flags nor call the optional chip-specific .free() callback. Move the call to gpiochip_free_hogs() up to fix this. Fixes: ff2b135922992756 ("gpio: make the gpiochip a real device") Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 90621fb93941..92159313361b 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1317,12 +1317,12 @@ void gpiochip_remove(struct gpio_chip *chip) /* FIXME: should the legacy sysfs handling be moved to gpio_device? */ gpiochip_sysfs_unregister(gdev); + gpiochip_free_hogs(chip); /* Numb the device, cancelling all outstanding operations */ gdev->chip = NULL; gpiochip_irqchip_remove(chip); acpi_gpiochip_remove(chip); gpiochip_remove_pin_ranges(chip); - gpiochip_free_hogs(chip); of_gpiochip_remove(chip); /* * We accept no more calls into the driver from this point, so -- GitLab From 6ba35da690f30af09706095b914d8031902fd3e5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 Jan 2017 10:20:04 -0800 Subject: [PATCH 0511/1442] xfs: Timely free truncated dirty pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0a417b8dc1f10b03e8f558b8a831f07ec4c23795 upstream. Commit 99579ccec4e2 "xfs: skip dirty pages in ->releasepage()" started to skip dirty pages in xfs_vm_releasepage() which also has the effect that if a dirty page is truncated, it does not get freed by block_invalidatepage() and is lingering in LRU list waiting for reclaim. So a simple loop like: while true; do dd if=/dev/zero of=file bs=1M count=100 rm file done will keep using more and more memory until we hit low watermarks and start pagecache reclaim which will eventually reclaim also the truncate pages. Keeping these truncated (and thus never usable) pages in memory is just a waste of memory, is unnecessarily stressing page cache reclaim, and reportedly also leads to anonymous mmap(2) returning ENOMEM prematurely. So instead of just skipping dirty pages in xfs_vm_releasepage(), return to old behavior of skipping them only if they have delalloc or unwritten buffers and fix the spurious warnings by warning only if the page is clean. CC: Brian Foster CC: Vlastimil Babka Reported-by: Petr Tůma Fixes: 99579ccec4e271c3d4d4e7c946058766812afdab Signed-off-by: Jan Kara Reviewed-by: Brian Foster Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_aops.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 2693ba84ec25..06763f5cc701 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1158,19 +1158,22 @@ xfs_vm_releasepage( * block_invalidatepage() can send pages that are still marked dirty * but otherwise have invalidated buffers. * - * We've historically freed buffers on the latter. Instead, quietly - * filter out all dirty pages to avoid spurious buffer state warnings. - * This can likely be removed once shrink_active_list() is fixed. + * We want to release the latter to avoid unnecessary buildup of the + * LRU, skip the former and warn if we've left any lingering + * delalloc/unwritten buffers on clean pages. Skip pages with delalloc + * or unwritten buffers and warn if the page is not dirty. Otherwise + * try to release the buffers. */ - if (PageDirty(page)) - return 0; - xfs_count_page_state(page, &delalloc, &unwritten); - if (WARN_ON_ONCE(delalloc)) + if (delalloc) { + WARN_ON_ONCE(!PageDirty(page)); return 0; - if (WARN_ON_ONCE(unwritten)) + } + if (unwritten) { + WARN_ON_ONCE(!PageDirty(page)); return 0; + } return try_to_free_buffers(page); } -- GitLab From 259495a0440f6b8025277171d7becb8b92cece82 Mon Sep 17 00:00:00 2001 From: Artur Molchanov Date: Fri, 30 Dec 2016 19:46:36 +0300 Subject: [PATCH 0512/1442] bridge: netfilter: Fix dropping packets that moving through bridge interface commit 14221cc45caad2fcab3a8543234bb7eda9b540d5 upstream. Problem: br_nf_pre_routing_finish() calls itself instead of br_nf_pre_routing_finish_bridge(). Due to this bug reverse path filter drops packets that go through bridge interface. User impact: Local docker containers with bridge network can not communicate with each other. Fixes: c5136b15ea36 ("netfilter: bridge: add and use br_nf_hook_thresh") Signed-off-by: Artur Molchanov Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netfilter_hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2fe9345c1407..7fbdbae58e65 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -399,7 +399,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish); + br_nf_pre_routing_finish_bridge); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); -- GitLab From e2d9ad2c540bf15958812189700a812110b452bf Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 8 Nov 2016 16:30:54 +0100 Subject: [PATCH 0513/1442] x86/cpu/AMD: Clean up cpu_llc_id assignment per topology feature commit b6a50cddbcbda7105355898ead18f1a647c22520 upstream. These changes do not affect current hw - just a cleanup: Currently, we assume that a system has a single Last Level Cache (LLC) per node, and that the cpu_llc_id is thus equal to the node_id. This no longer applies since Fam17h can have multiple last level caches within a node. So group the cpu_llc_id assignment by topology feature and family in order to make the computation of cpu_llc_id on the different families more clear. Here is how the LLC ID is being computed on the different families: The NODEID_MSR feature only applies to Fam10h in which case the LLC is at the node level. The TOPOEXT feature is used on families 15h, 16h and 17h. So far we only see multiple last level caches if L3 caches are available. Otherwise, the cpu_llc_id will default to be the phys_proc_id. We have L3 caches only on families 15h and 17h: - on Fam15h, the LLC is at the node level. - on Fam17h, the LLC is at the core complex level and can be found by right shifting the APIC ID. Also, keep the family checks explicit so that new families will fall back to the default, which will be node_id for TOPOEXT systems. Single node systems in families 10h and 15h will have a Node ID of 0 which will be the same as the phys_proc_id, so we don't need to check for multiple nodes before using the node_id. Tested-by: Borislav Petkov Signed-off-by: Yazen Ghannam [ Rewrote the commit message. ] Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Cc: Aravind Gopalakrishnan Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161108153054.bs3sajbyevq6a6uu@pd.tnic Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1e81a37c034e..4daad1e39352 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -314,11 +314,30 @@ static void amd_get_topology(struct cpuinfo_x86 *c) smp_num_siblings = ((ebx >> 8) & 3) + 1; c->x86_max_cores /= smp_num_siblings; c->cpu_core_id = ebx & 0xff; + + /* + * We may have multiple LLCs if L3 caches exist, so check if we + * have an L3 cache by looking at the L3 cache CPUID leaf. + */ + if (cpuid_edx(0x80000006)) { + if (c->x86 == 0x17) { + /* + * LLC is at the core complex level. + * Core complex id is ApicId[3]. + */ + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; + } else { + /* LLC is at the node level. */ + per_cpu(cpu_llc_id, cpu) = node_id; + } + } } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); node_id = value & 7; + + per_cpu(cpu_llc_id, cpu) = node_id; } else return; @@ -329,9 +348,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_AMD_DCM); cus_per_node = c->x86_max_cores / nodes_per_socket; - /* store NodeID, use llc_shared_map to store sibling info */ - per_cpu(cpu_llc_id, cpu) = node_id; - /* core id has to be in the [0 .. cores_per_node - 1] range */ c->cpu_core_id %= cus_per_node; } @@ -356,15 +372,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) /* use socket ID also for last level cache */ per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; amd_get_topology(c); - - /* - * Fix percpu cpu_llc_id here as LLC topology is different - * for Fam17h systems. - */ - if (c->x86 != 0x17 || !cpuid_edx(0x80000006)) - return; - - per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; #endif } -- GitLab From bd7e769457f964b0bb857e599fb0a914e0c05a3a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 9 Dec 2016 19:29:09 +0100 Subject: [PATCH 0514/1442] x86/bugs: Separate AMD E400 erratum and C1E bug commit 3344ed30791af66dbbad5f375008f3d1863b6c99 upstream. The workaround for the AMD Erratum E400 (Local APIC timer stops in C1E state) is a two step process: - Selection of the E400 aware idle routine - Detection whether the platform is affected The idle routine selection happens for possibly affected CPUs depending on family/model/stepping information. These range of CPUs is not necessarily affected as the decision whether to enable the C1E feature is made by the firmware. Unfortunately there is no way to query this at early boot. The current implementation polls a MSR in the E400 aware idle routine to detect whether the CPU is affected. This is inefficient on non affected CPUs because every idle entry has to do the MSR read. There is a better way to detect this before going idle for the first time which requires to seperate the bug flags: X86_BUG_AMD_E400 - Selects the E400 aware idle routine and enables the detection X86_BUG_AMD_APIC_C1E - Set when the platform is affected by E400 Replace the current X86_BUG_AMD_APIC_C1E usage by the new X86_BUG_AMD_E400 bug bit to select the idle routine which currently does an unconditional detection poll. X86_BUG_AMD_APIC_C1E is going to be used in later patches to remove the MSR polling and simplify the handling of this misfeature. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20161209182912.2726-3-bp@alien8.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/amd.c | 20 +++++++++++++------- arch/x86/kernel/process.c | 3 +-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a39629206864..ed10b5bf9b93 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -311,4 +311,6 @@ #define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ + #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4daad1e39352..71cae73a5076 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -20,6 +20,10 @@ #include "cpu.h" +static const int amd_erratum_383[]; +static const int amd_erratum_400[]; +static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); + /* * nodes_per_socket: Stores the number of nodes per socket. * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX @@ -592,11 +596,16 @@ static void early_init_amd(struct cpuinfo_x86 *c) /* F16h erratum 793, CVE-2013-6885 */ if (c->x86 == 0x16 && c->x86_model <= 0xf) msr_set_bit(MSR_AMD64_LS_CFG, 15); -} -static const int amd_erratum_383[]; -static const int amd_erratum_400[]; -static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); + /* + * Check whether the machine is affected by erratum 400. This is + * used to select the proper idle routine and to enable the check + * whether the machine is affected in arch_post_acpi_init(), which + * sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check. + */ + if (cpu_has_amd_erratum(c, amd_erratum_400)) + set_cpu_bug(c, X86_BUG_AMD_E400); +} static void init_amd_k8(struct cpuinfo_x86 *c) { @@ -777,9 +786,6 @@ static void init_amd(struct cpuinfo_x86 *c) if (c->x86 > 0x11) set_cpu_cap(c, X86_FEATURE_ARAT); - if (cpu_has_amd_erratum(c, amd_erratum_400)) - set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); - rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); /* 3DNow or LM implies PREFETCHW */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0888a879120f..8e10e72bf6ee 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -448,8 +448,7 @@ void select_idle_routine(const struct cpuinfo_x86 *c) if (x86_idle || boot_option_idle_override == IDLE_POLL) return; - if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) { - /* E400: APIC timer interrupt does not wake up CPU from C1e */ + if (boot_cpu_has_bug(X86_BUG_AMD_E400)) { pr_info("using AMD E400 aware idle routine\n"); x86_idle = amd_e400_idle; } else if (prefer_mwait_c1_over_halt(c)) { -- GitLab From 99ff99b830c1d70d0c6ec50c64588d9bd8bd5d05 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 5 Jan 2017 10:26:38 +0100 Subject: [PATCH 0515/1442] x86/CPU/AMD: Fix Bulldozer topology commit a33d331761bc5dd330499ca5ceceb67f0640a8e6 upstream. The following commit: 8196dab4fc15 ("x86/cpu: Get rid of compute_unit_id") ... broke the initial strategy for Bulldozer-based cores' topology, where we consider each thread of a compute unit a standalone core and not a HT or SMT thread. Revert to the firmware-supplied core_id numbering and do not make them thread siblings as we don't consider them for such even if they technically are, more or less. Reported-and-tested-by: Brice Goglin Tested-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 8196dab4fc15 ("x86/cpu: Get rid of compute_unit_id") Link: http://lkml.kernel.org/r/20170105092638.5247-1-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 71cae73a5076..1d3167269a67 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -309,15 +309,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get information required for multi-node processors */ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - u32 eax, ebx, ecx, edx; - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - node_id = ecx & 7; - - /* get compute unit information */ - smp_num_siblings = ((ebx >> 8) & 3) + 1; - c->x86_max_cores /= smp_num_siblings; - c->cpu_core_id = ebx & 0xff; + node_id = cpuid_ecx(0x8000001e) & 7; /* * We may have multiple LLCs if L3 caches exist, so check if we -- GitLab From 88d3670a1de4246cd40dfacdc21289b313caf5ed Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 13 Dec 2016 18:50:13 -0800 Subject: [PATCH 0516/1442] wusbcore: Fix one more crypto-on-the-stack bug commit 620f1a632ebcc9811c2f8009ba52297c7006f805 upstream. The driver put a constant buffer of all zeros on the stack and pointed a scatterlist entry at it. This doesn't work with virtual stacks. Use ZERO_PAGE instead. Reported-by: Eric Biggers Signed-off-by: Andy Lutomirski Signed-off-by: Greg Kroah-Hartman --- drivers/usb/wusbcore/crypto.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c index 79451f7ef1b7..062c205f0046 100644 --- a/drivers/usb/wusbcore/crypto.c +++ b/drivers/usb/wusbcore/crypto.c @@ -216,7 +216,6 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc, struct scatterlist sg[4], sg_dst; void *dst_buf; size_t dst_size; - const u8 bzero[16] = { 0 }; u8 iv[crypto_skcipher_ivsize(tfm_cbc)]; size_t zero_padding; @@ -261,7 +260,7 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc, sg_set_buf(&sg[1], &scratch->b1, sizeof(scratch->b1)); sg_set_buf(&sg[2], b, blen); /* 0 if well behaved :) */ - sg_set_buf(&sg[3], bzero, zero_padding); + sg_set_page(&sg[3], ZERO_PAGE(0), zero_padding, 0); sg_init_one(&sg_dst, dst_buf, dst_size); skcipher_request_set_tfm(req, tfm_cbc); -- GitLab From dfd48efcde844cd3710e05caf734484a974f32ea Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 10 Jan 2017 10:46:00 -0600 Subject: [PATCH 0517/1442] usb: musb: fix runtime PM in debugfs commit 7b6c1b4c0e1e44544aa18161dba6a741c080a7ef upstream. MUSB driver now has runtime PM support, but the debugfs driver misses the PM _get/_put() calls, which could cause MUSB register access failure. Acked-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_debugfs.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c index 9b22d946c089..534a3f6fa89c 100644 --- a/drivers/usb/musb/musb_debugfs.c +++ b/drivers/usb/musb/musb_debugfs.c @@ -114,6 +114,7 @@ static int musb_regdump_show(struct seq_file *s, void *unused) unsigned i; seq_printf(s, "MUSB (M)HDRC Register Dump\n"); + pm_runtime_get_sync(musb->controller); for (i = 0; i < ARRAY_SIZE(musb_regmap); i++) { switch (musb_regmap[i].size) { @@ -132,6 +133,8 @@ static int musb_regdump_show(struct seq_file *s, void *unused) } } + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); return 0; } @@ -145,7 +148,10 @@ static int musb_test_mode_show(struct seq_file *s, void *unused) struct musb *musb = s->private; unsigned test; + pm_runtime_get_sync(musb->controller); test = musb_readb(musb->mregs, MUSB_TESTMODE); + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); if (test & MUSB_TEST_FORCE_HOST) seq_printf(s, "force host\n"); @@ -194,11 +200,12 @@ static ssize_t musb_test_mode_write(struct file *file, u8 test; char buf[18]; + pm_runtime_get_sync(musb->controller); test = musb_readb(musb->mregs, MUSB_TESTMODE); if (test) { dev_err(musb->controller, "Error: test mode is already set. " "Please do USB Bus Reset to start a new test.\n"); - return count; + goto ret; } memset(buf, 0x00, sizeof(buf)); @@ -234,6 +241,9 @@ static ssize_t musb_test_mode_write(struct file *file, musb_writeb(musb->mregs, MUSB_TESTMODE, test); +ret: + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); return count; } @@ -254,8 +264,13 @@ static int musb_softconnect_show(struct seq_file *s, void *unused) switch (musb->xceiv->otg->state) { case OTG_STATE_A_HOST: case OTG_STATE_A_WAIT_BCON: + pm_runtime_get_sync(musb->controller); + reg = musb_readb(musb->mregs, MUSB_DEVCTL); connect = reg & MUSB_DEVCTL_SESSION ? 1 : 0; + + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); break; default: connect = -1; @@ -284,6 +299,7 @@ static ssize_t musb_softconnect_write(struct file *file, if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count))) return -EFAULT; + pm_runtime_get_sync(musb->controller); if (!strncmp(buf, "0", 1)) { switch (musb->xceiv->otg->state) { case OTG_STATE_A_HOST: @@ -314,6 +330,8 @@ static ssize_t musb_softconnect_write(struct file *file, } } + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); return count; } -- GitLab From 58ede4beda662c4e1681fee4fae2174028a1a841 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 10 Jan 2017 12:05:37 +0100 Subject: [PATCH 0518/1442] USB: serial: kl5kusb105: fix line-state error handling commit 146cc8a17a3b4996f6805ee5c080e7101277c410 upstream. The current implementation failed to detect short transfers when attempting to read the line state, and also, to make things worse, logged the content of the uninitialised heap transfer buffer. Fixes: abf492e7b3ae ("USB: kl5kusb105: fix DMA buffers on stack") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/kl5kusb105.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index 0ee190fc1bf8..6cb45757818f 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -192,10 +192,11 @@ static int klsi_105_get_line_state(struct usb_serial_port *port, status_buf, KLSI_STATUSBUF_LEN, 10000 ); - if (rc < 0) - dev_err(&port->dev, "Reading line status failed (error = %d)\n", - rc); - else { + if (rc != KLSI_STATUSBUF_LEN) { + dev_err(&port->dev, "reading line status failed: %d\n", rc); + if (rc >= 0) + rc = -EIO; + } else { status = get_unaligned_le16(status_buf); dev_info(&port->serial->dev->dev, "read status %x %x\n", -- GitLab From 4aeab97a051560835668fc5c61eb56b20bd485d0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:10 +0100 Subject: [PATCH 0519/1442] USB: serial: ch341: fix initial modem-control state commit 4e2da44691cffbfffb1535f478d19bc2dca3e62b upstream. DTR and RTS will be asserted by the tty-layer when the port is opened and deasserted on close (if HUPCL is set). Make sure the initial state is not-asserted before the port is first opened as well. Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ch341.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index f139488d0816..6c04c45bcd06 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -253,7 +253,6 @@ static int ch341_port_probe(struct usb_serial_port *port) spin_lock_init(&priv->lock); priv->baud_rate = DEFAULT_BAUD_RATE; - priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR; r = ch341_configure(port->serial->dev, priv); if (r < 0) -- GitLab From 1685daad0b0cb117d6ec35fb93d848445da6b201 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:14 +0100 Subject: [PATCH 0520/1442] USB: serial: ch341: fix resume after reset commit ce5e292828117d1b71cbd3edf9e9137cf31acd30 upstream. Fix reset-resume handling which failed to resubmit the read and interrupt URBs, thereby leaving a port that was open before suspend in a broken state until closed and reopened. Fixes: 1ded7ea47b88 ("USB: ch341 serial: fix port number changed after resume") Fixes: 2bfd1c96a9fb ("USB: serial: ch341: remove reset_resume callback") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ch341.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 6c04c45bcd06..a7a16c6ced62 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -538,14 +538,23 @@ static int ch341_tiocmget(struct tty_struct *tty) static int ch341_reset_resume(struct usb_serial *serial) { - struct ch341_private *priv; - - priv = usb_get_serial_port_data(serial->port[0]); + struct usb_serial_port *port = serial->port[0]; + struct ch341_private *priv = usb_get_serial_port_data(port); + int ret; /* reconfigure ch341 serial port after bus-reset */ ch341_configure(serial->dev, priv); - return 0; + if (tty_port_initialized(&port->port)) { + ret = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO); + if (ret) { + dev_err(&port->dev, "failed to submit interrupt urb: %d\n", + ret); + return ret; + } + } + + return usb_serial_generic_resume(serial); } static struct usb_serial_driver ch341_device = { -- GitLab From 139556a98511929d0354389e5017d599dedf9498 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:13 +0100 Subject: [PATCH 0521/1442] USB: serial: ch341: fix open error handling commit f2950b78547ffb8475297ada6b92bc2d774d5461 upstream. Make sure to stop the interrupt URB before returning on errors during open. Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ch341.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index a7a16c6ced62..0a2420063098 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -314,7 +314,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port) r = ch341_configure(serial->dev, priv); if (r) - goto out; + return r; if (tty) ch341_set_termios(tty, port, NULL); @@ -324,12 +324,19 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port) if (r) { dev_err(&port->dev, "%s - failed to submit interrupt urb: %d\n", __func__, r); - goto out; + return r; } r = usb_serial_generic_open(tty, port); + if (r) + goto err_kill_interrupt_urb; + + return 0; + +err_kill_interrupt_urb: + usb_kill_urb(port->interrupt_in_urb); -out: return r; + return r; } /* Old_termios contains the original termios settings and -- GitLab From 3ed1f6da3a179cde1ccb3d72fe8a0f1d169f9800 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:18 +0100 Subject: [PATCH 0522/1442] USB: serial: ch341: fix control-message error handling commit 2d5a9c72d0c4ac73cf97f4b7814ed6c44b1e49ae upstream. A short control transfer would currently fail to be detected, something which could lead to stale buffer data being used as valid input. Check for short transfers, and make sure to log any transfer errors. Note that this also avoids leaking heap data to user space (TIOCMGET) and the remote device (break control). Fixes: 6ce76104781a ("USB: Driver for CH341 USB-serial adaptor") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ch341.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 0a2420063098..517671522fe8 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -99,6 +99,8 @@ static int ch341_control_out(struct usb_device *dev, u8 request, r = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, value, index, NULL, 0, DEFAULT_TIMEOUT); + if (r < 0) + dev_err(&dev->dev, "failed to send control message: %d\n", r); return r; } @@ -116,7 +118,20 @@ static int ch341_control_in(struct usb_device *dev, r = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, value, index, buf, bufsize, DEFAULT_TIMEOUT); - return r; + if (r < bufsize) { + if (r >= 0) { + dev_err(&dev->dev, + "short control message received (%d < %u)\n", + r, bufsize); + r = -EIO; + } + + dev_err(&dev->dev, "failed to receive control message: %d\n", + r); + return r; + } + + return 0; } static int ch341_set_baudrate(struct usb_device *dev, @@ -158,9 +173,9 @@ static int ch341_set_handshake(struct usb_device *dev, u8 control) static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv) { + const unsigned int size = 2; char *buffer; int r; - const unsigned size = 8; unsigned long flags; buffer = kmalloc(size, GFP_KERNEL); @@ -171,14 +186,9 @@ static int ch341_get_status(struct usb_device *dev, struct ch341_private *priv) if (r < 0) goto out; - /* setup the private status if available */ - if (r == 2) { - r = 0; - spin_lock_irqsave(&priv->lock, flags); - priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT; - spin_unlock_irqrestore(&priv->lock, flags); - } else - r = -EPROTO; + spin_lock_irqsave(&priv->lock, flags); + priv->line_status = (~(*buffer)) & CH341_BITS_MODEM_STAT; + spin_unlock_irqrestore(&priv->lock, flags); out: kfree(buffer); return r; @@ -188,9 +198,9 @@ out: kfree(buffer); static int ch341_configure(struct usb_device *dev, struct ch341_private *priv) { + const unsigned int size = 2; char *buffer; int r; - const unsigned size = 8; buffer = kmalloc(size, GFP_KERNEL); if (!buffer) -- GitLab From 0556a65e8b7d562ad99f10f36a59f7c87af0ca55 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:11 +0100 Subject: [PATCH 0523/1442] USB: serial: ch341: fix open and resume after B0 commit a20047f36e2f6a1eea4f1fd261aaa55882369868 upstream. The private baud_rate variable is used to configure the port at open and reset-resume and must never be set to (and left at) zero or reset-resume and all further open attempts will fail. Fixes: aa91def41a7b ("USB: ch341: set tty baud speed according to tty struct") Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ch341.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 517671522fe8..22b2c464b468 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -361,12 +361,11 @@ static void ch341_set_termios(struct tty_struct *tty, baud_rate = tty_get_baud_rate(tty); - priv->baud_rate = baud_rate; - if (baud_rate) { spin_lock_irqsave(&priv->lock, flags); priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS); spin_unlock_irqrestore(&priv->lock, flags); + priv->baud_rate = baud_rate; ch341_set_baudrate(port->serial->dev, priv); } else { spin_lock_irqsave(&priv->lock, flags); -- GitLab From 61a8c3372adbfc64ee9bcae400908656602c3a92 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 5 Jan 2017 14:14:54 -0800 Subject: [PATCH 0524/1442] Input: elants_i2c - avoid divide by 0 errors on bad touchscreen data commit 1c3415a06b1016a596bfe59e0cfee56c773aa958 upstream. The following crash may be seen if bad data is received from the touchscreen. [ 2189.425150] elants_i2c i2c-ELAN0001:00: unknown packet ff ff ff ff [ 2189.430738] divide error: 0000 [#1] PREEMPT SMP [ 2189.434679] gsmi: Log Shutdown Reason 0x03 [ 2189.434689] Modules linked in: ip6t_REJECT nf_reject_ipv6 rfcomm evdi uinput uvcvideo cmac videobuf2_vmalloc videobuf2_memops snd_hda_codec_hdmi i2c_dev videobuf2_core snd_soc_sst_cht_bsw_rt5645 snd_hda_intel snd_intel_sst_acpi btusb btrtl btbcm btintel bluetooth snd_soc_sst_acpi snd_hda_codec snd_intel_sst_core snd_hwdep snd_soc_sst_mfld_platform snd_hda_core snd_soc_rt5645 memconsole_x86_legacy memconsole zram snd_soc_rl6231 fuse ip6table_filter iwlmvm iwlwifi iwl7000_mac80211 cfg80211 iio_trig_sysfs joydev cros_ec_sensors cros_ec_sensors_core industrialio_triggered_buffer kfifo_buf industrialio snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device ppp_async ppp_generic slhc tun [ 2189.434866] CPU: 0 PID: 106 Comm: irq/184-ELAN000 Tainted: G W 3.18.0-13101-g57e8190 #1 [ 2189.434883] Hardware name: GOOGLE Ultima, BIOS Google_Ultima.7287.131.43 07/20/2016 [ 2189.434898] task: ffff88017a0b6d80 ti: ffff88017a2bc000 task.ti: ffff88017a2bc000 [ 2189.434913] RIP: 0010:[] [] elants_i2c_irq+0x190/0x200 [ 2189.434937] RSP: 0018:ffff88017a2bfd98 EFLAGS: 00010293 [ 2189.434948] RAX: 0000000000000000 RBX: ffff88017a967828 RCX: ffff88017a9678e8 [ 2189.434962] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000000 [ 2189.434975] RBP: ffff88017a2bfdd8 R08: 00000000000003e8 R09: 0000000000000000 [ 2189.434989] R10: 0000000000000000 R11: 000000000044a2bd R12: ffff88017a991800 [ 2189.435001] R13: ffffffffbe8a2a53 R14: ffff88017a0b6d80 R15: ffff88017a0b6d80 [ 2189.435011] FS: 0000000000000000(0000) GS:ffff88017fc00000(0000) knlGS:0000000000000000 [ 2189.435022] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 2189.435030] CR2: 00007f678d94b000 CR3: 000000003f41a000 CR4: 00000000001007f0 [ 2189.435039] Stack: [ 2189.435044] ffff88017a2bfda8 ffff88017a9678e8 646464647a2bfdd8 0000000006e09574 [ 2189.435060] 0000000000000000 ffff88017a088b80 ffff88017a921000 ffffffffbe8a2a53 [ 2189.435074] ffff88017a2bfe08 ffffffffbe8a2a73 ffff88017a0b6d80 0000000006e09574 [ 2189.435089] Call Trace: [ 2189.435101] [] ? irq_thread_dtor+0xa9/0xa9 [ 2189.435112] [] irq_thread_fn+0x20/0x40 [ 2189.435123] [] irq_thread+0x14e/0x222 [ 2189.435135] [] ? __schedule+0x3b3/0x57a [ 2189.435145] [] ? wake_threads_waitq+0x2d/0x2d [ 2189.435156] [] ? irq_thread_fn+0x40/0x40 [ 2189.435168] [] kthread+0x10e/0x116 [ 2189.435178] [] ? __kthread_parkme+0x67/0x67 [ 2189.435189] [] ret_from_fork+0x7c/0xb0 [ 2189.435199] [] ? __kthread_parkme+0x67/0x67 [ 2189.435208] Code: ff ff eb 73 0f b6 bb c1 00 00 00 83 ff 03 7e 13 49 8d 7c 24 20 ba 04 00 00 00 48 c7 c6 8a cd 21 bf eb 4d 0f b6 83 c2 00 00 00 99 ff 83 f8 37 75 15 48 6b f7 37 4c 8d a3 c4 00 00 00 4c 8d ac [ 2189.435312] RIP [] elants_i2c_irq+0x190/0x200 [ 2189.435323] RSP [ 2189.435350] ---[ end trace f4945345a75d96dd ]--- [ 2189.443841] Kernel panic - not syncing: Fatal exception [ 2189.444307] Kernel Offset: 0x3d800000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 2189.444519] gsmi: Log Shutdown Reason 0x02 The problem was seen with a 3.18 based kernel, but there is no reason to believe that the upstream code is safe. Fixes: 66aee90088da2 ("Input: add support for Elan eKTH I2C touchscreens") Signed-off-by: Guenter Roeck Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/elants_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 02aec284deca..3e6003d32e56 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -914,9 +914,9 @@ static irqreturn_t elants_i2c_irq(int irq, void *_dev) case QUEUE_HEADER_NORMAL: report_count = ts->buf[FW_HDR_COUNT]; - if (report_count > 3) { + if (report_count == 0 || report_count > 3) { dev_err(&client->dev, - "too large report count: %*ph\n", + "bad report count: %*ph\n", HEADER_SIZE, ts->buf); break; } -- GitLab From 93c94ec23faeea8d9a501b6188c701fc282672b1 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 6 Jan 2017 19:02:57 +0800 Subject: [PATCH 0525/1442] i2c: print correct device invalid address commit 6f724fb3039522486fce2e32e4c0fbe238a6ab02 upstream. In of_i2c_register_device(), when the check for device address validity fails we print the info.addr, which has not been assigned properly. Fix this by printing the actual invalid address. Signed-off-by: John Garry Reviewed-by: Vladimir Zapolskiy Signed-off-by: Wolfram Sang Fixes: b4e2f6ac1281 ("i2c: apply DT flags when probing") Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index b432b64e307a..7484aac1e14d 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1657,7 +1657,7 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap, if (i2c_check_addr_validity(addr, info.flags)) { dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n", - info.addr, node->full_name); + addr, node->full_name); return ERR_PTR(-EINVAL); } -- GitLab From ab8957396a692d46a357aec8ff57abc9bd5a878a Mon Sep 17 00:00:00 2001 From: Vlad Tsyrklevich Date: Mon, 9 Jan 2017 22:53:36 +0700 Subject: [PATCH 0526/1442] i2c: fix kernel memory disclosure in dev interface commit 30f939feaeee23e21391cfc7b484f012eb189c3c upstream. i2c_smbus_xfer() does not always fill an entire block, allowing kernel stack memory disclosure through the temp variable. Clear it before it's read to. Signed-off-by: Vlad Tsyrklevich Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 66f323fd3982..6f638bbc922d 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -331,7 +331,7 @@ static noinline int i2cdev_ioctl_smbus(struct i2c_client *client, unsigned long arg) { struct i2c_smbus_ioctl_data data_arg; - union i2c_smbus_data temp; + union i2c_smbus_data temp = {}; int datasize, res; if (copy_from_user(&data_arg, -- GitLab From d06367ac1730ded79aa78307126236bf83af95a3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 14 Jan 2017 19:33:08 -0500 Subject: [PATCH 0527/1442] fix a fencepost error in pipe_advance() commit b9dc6f65bc5e232d1c05fe34b5daadc7e8bbf1fb upstream. The logics in pipe_advance() used to release all buffers past the new position failed in cases when the number of buffers to release was equal to pipe->buffers. If that happened, none of them had been released, leaving pipe full. Worse, it was trivial to trigger and we end up with pipe full of uninitialized pages. IOW, it's an infoleak. Reported-by: "Alan J. Wylie" Tested-by: "Alan J. Wylie" Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- lib/iov_iter.c | 54 +++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f2bd21b93dfc..efb0b4d267a1 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -678,43 +678,50 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, } EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); +static inline void pipe_truncate(struct iov_iter *i) +{ + struct pipe_inode_info *pipe = i->pipe; + if (pipe->nrbufs) { + size_t off = i->iov_offset; + int idx = i->idx; + int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1); + if (off) { + pipe->bufs[idx].len = off - pipe->bufs[idx].offset; + idx = next_idx(idx, pipe); + nrbufs++; + } + while (pipe->nrbufs > nrbufs) { + pipe_buf_release(pipe, &pipe->bufs[idx]); + idx = next_idx(idx, pipe); + pipe->nrbufs--; + } + } +} + static void pipe_advance(struct iov_iter *i, size_t size) { struct pipe_inode_info *pipe = i->pipe; - struct pipe_buffer *buf; - int idx = i->idx; - size_t off = i->iov_offset, orig_sz; - if (unlikely(i->count < size)) size = i->count; - orig_sz = size; - if (size) { + struct pipe_buffer *buf; + size_t off = i->iov_offset, left = size; + int idx = i->idx; if (off) /* make it relative to the beginning of buffer */ - size += off - pipe->bufs[idx].offset; + left += off - pipe->bufs[idx].offset; while (1) { buf = &pipe->bufs[idx]; - if (size <= buf->len) + if (left <= buf->len) break; - size -= buf->len; + left -= buf->len; idx = next_idx(idx, pipe); } - buf->len = size; i->idx = idx; - off = i->iov_offset = buf->offset + size; - } - if (off) - idx = next_idx(idx, pipe); - if (pipe->nrbufs) { - int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); - /* [curbuf,unused) is in use. Free [idx,unused) */ - while (idx != unused) { - pipe_buf_release(pipe, &pipe->bufs[idx]); - idx = next_idx(idx, pipe); - pipe->nrbufs--; - } + i->iov_offset = buf->offset + left; } - i->count -= orig_sz; + i->count -= size; + /* ... and discard everything past that point */ + pipe_truncate(i); } void iov_iter_advance(struct iov_iter *i, size_t size) @@ -774,6 +781,7 @@ void iov_iter_pipe(struct iov_iter *i, int direction, size_t count) { BUG_ON(direction != ITER_PIPE); + WARN_ON(pipe->nrbufs == pipe->buffers); i->type = direction; i->pipe = pipe; i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); -- GitLab From 4d0f302bf56a03b8023f06a27b811c1a4625c20d Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 11 Jan 2017 17:10:34 +0200 Subject: [PATCH 0528/1442] xhci: fix deadlock at host remove by running watchdog correctly commit d6169d04097fd9ddf811e63eae4e5cd71e6666e2 upstream. If a URB is killed while the host is removed we can end up in a situation where the hub thread takes the roothub device lock, and waits for the URB to be given back by xhci-hcd, blocking the host remove code. xhci-hcd tries to stop the endpoint and give back the urb, but can't as the host is removed from PCI bus at the same time, preventing the normal way of giving back urb. Instead we need to rely on the stop command timeout function to give back the urb. This xhci_stop_endpoint_command_watchdog() timeout function used a XHCI_STATE_DYING flag to indicate if the timeout function is already running, but later this flag has been taking into use in other places to mark that xhci is dying. Remove checks for XHCI_STATE_DYING in xhci_urb_dequeue. We are still checking that reading from pci state does not return 0xffffffff or that host is not halted before trying to stop the endpoint. This whole area of stopping endpoints, giving back URBs, and the wathdog timeout need rework, this fix focuses on solving a specific deadlock issue that we can then send to stable before any major rework. Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 11 ----------- drivers/usb/host/xhci.c | 13 ------------- 2 files changed, 24 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 771a6da9caea..521c1816a26a 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -917,17 +917,6 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg) spin_lock_irqsave(&xhci->lock, flags); ep->stop_cmds_pending--; - if (xhci->xhc_state & XHCI_STATE_REMOVING) { - spin_unlock_irqrestore(&xhci->lock, flags); - return; - } - if (xhci->xhc_state & XHCI_STATE_DYING) { - xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, - "Stop EP timer ran, but another timer marked " - "xHCI as DYING, exiting."); - spin_unlock_irqrestore(&xhci->lock, flags); - return; - } if (!(ep->stop_cmds_pending == 0 && (ep->ep_state & EP_HALT_PENDING))) { xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "Stop EP timer ran, but no command pending, " diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index ad0624386950..34e23c7d7797 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1529,19 +1529,6 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) xhci_urb_free_priv(urb_priv); return ret; } - if ((xhci->xhc_state & XHCI_STATE_DYING) || - (xhci->xhc_state & XHCI_STATE_HALTED)) { - xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, - "Ep 0x%x: URB %p to be canceled on " - "non-responsive xHCI host.", - urb->ep->desc.bEndpointAddress, urb); - /* Let the stop endpoint command watchdog timer (which set this - * state) finish cleaning up the endpoint TD lists. We must - * have caught it in the middle of dropping a lock and giving - * back an URB. - */ - goto done; - } ep_index = xhci_get_endpoint_index(&urb->ep->desc); ep = &xhci->devs[urb->dev->slot_id]->eps[ep_index]; -- GitLab From 28dad9aa9b367b81f99a36a06d16e89e649133ce Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 6 Jan 2017 14:12:51 +0100 Subject: [PATCH 0529/1442] btrfs: fix crash when tracepoint arguments are freed by wq callbacks commit ac0c7cf8be00f269f82964cf7b144ca3edc5dbc4 upstream. Enabling btrfs tracepoints leads to instant crash, as reported. The wq callbacks could free the memory and the tracepoints started to dereference the members to get to fs_info. The proposed fix https://marc.info/?l=linux-btrfs&m=148172436722606&w=2 removed the tracepoints but we could preserve them by passing only the required data in a safe way. Fixes: bc074524e123 ("btrfs: prefix fsid to all trace events") Reported-by: Sebastian Andrzej Siewior Reviewed-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/async-thread.c | 15 +++++++++++---- include/trace/events/btrfs.h | 22 +++++++++++++--------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 63d197724519..ff0b0be92d61 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -273,6 +273,8 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) unsigned long flags; while (1) { + void *wtag; + spin_lock_irqsave(lock, flags); if (list_empty(list)) break; @@ -299,11 +301,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) spin_unlock_irqrestore(lock, flags); /* - * we don't want to call the ordered free functions - * with the lock held though + * We don't want to call the ordered free functions with the + * lock held though. Save the work as tag for the trace event, + * because the callback could free the structure. */ + wtag = work; work->ordered_free(work); - trace_btrfs_all_work_done(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); } spin_unlock_irqrestore(lock, flags); } @@ -311,6 +315,7 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) static void normal_work_helper(struct btrfs_work *work) { struct __btrfs_workqueue *wq; + void *wtag; int need_order = 0; /* @@ -324,6 +329,8 @@ static void normal_work_helper(struct btrfs_work *work) if (work->ordered_func) need_order = 1; wq = work->wq; + /* Safe for tracepoints in case work gets freed by the callback */ + wtag = work; trace_btrfs_work_sched(work); thresh_exec_hook(wq); @@ -333,7 +340,7 @@ static void normal_work_helper(struct btrfs_work *work) run_ordered_work(wq); } if (!need_order) - trace_btrfs_all_work_done(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); } void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index e030d6f6c19a..6d7fe1169956 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1162,22 +1162,26 @@ DECLARE_EVENT_CLASS(btrfs__work, __entry->func, __entry->ordered_func, __entry->ordered_free) ); -/* For situiations that the work is freed */ +/* + * For situiations when the work is freed, we pass fs_info and a tag that that + * matches address of the work structure so it can be paired with the + * scheduling event. + */ DECLARE_EVENT_CLASS(btrfs__work__done, - TP_PROTO(struct btrfs_work *work), + TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag), - TP_ARGS(work), + TP_ARGS(fs_info, wtag), TP_STRUCT__entry_btrfs( - __field( void *, work ) + __field( void *, wtag ) ), - TP_fast_assign_btrfs(btrfs_work_owner(work), - __entry->work = work; + TP_fast_assign_btrfs(fs_info, + __entry->wtag = wtag; ), - TP_printk_btrfs("work->%p", __entry->work) + TP_printk_btrfs("work->%p", __entry->wtag) ); DEFINE_EVENT(btrfs__work, btrfs_work_queued, @@ -1196,9 +1200,9 @@ DEFINE_EVENT(btrfs__work, btrfs_work_sched, DEFINE_EVENT(btrfs__work__done, btrfs_all_work_done, - TP_PROTO(struct btrfs_work *work), + TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag), - TP_ARGS(work) + TP_ARGS(fs_info, wtag) ); DEFINE_EVENT(btrfs__work, btrfs_ordered_sched, -- GitLab From f9cf776b0555a55a6f64871bf9433f0555e846b6 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 16 Dec 2016 18:26:54 +0900 Subject: [PATCH 0530/1442] ASoC: hdmi-codec: use unsigned type to structure members with bit-field commit 9e4d59ada4d602e78eee9fb5f898ce61fdddb446 upstream. This is a fix for Linux 4.10-rc1. In C language specification, a bit-field is interpreted as a signed or unsigned integer type consisting of the specified number of bits. In GCC manual, the range of a signed bit field of N bits is from -(2^N) / 2 to ((2^N) / 2) - 1 https://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html#Bit-Fields Therefore, when defined as 1 bit-field with signed type, variables can represents -1 and 0. The snd-soc-hdmi-codec module includes a structure which has signed type members with bit-fields. Codes of this module assign 0 and 1 to the members. This seems to result in implementation-dependent behaviours. As of v4.10-rc1 merge window, outside of sound subsystem, this structure is referred by below GPU modules. - tda998x - sti-drm - mediatek-drm-hdmi - msm As long as I review their codes relevant to the structure, the structure members are used just for condition statements and printk formats. My proposal of change is a bit intrusive to the printk formats but this may be acceptable. Totally, it's reasonable to use unsigned type for the structure members. This bug is detected by Sparse, static code analyzer with below warnings. ./include/sound/hdmi-codec.h:39:26: error: dubious one-bit signed bitfield ./include/sound/hdmi-codec.h:40:28: error: dubious one-bit signed bitfield ./include/sound/hdmi-codec.h:41:29: error: dubious one-bit signed bitfield ./include/sound/hdmi-codec.h:42:31: error: dubious one-bit signed bitfield Fixes: 09184118a8ab ("ASoC: hdmi-codec: Add hdmi-codec for external HDMI-encoders") Signed-off-by: Takashi Sakamoto Acked-by: Arnaud Pouliquen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- include/sound/hdmi-codec.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/sound/hdmi-codec.h b/include/sound/hdmi-codec.h index 530c57bdefa0..915c4357945c 100644 --- a/include/sound/hdmi-codec.h +++ b/include/sound/hdmi-codec.h @@ -36,10 +36,10 @@ struct hdmi_codec_daifmt { HDMI_AC97, HDMI_SPDIF, } fmt; - int bit_clk_inv:1; - int frame_clk_inv:1; - int bit_clk_master:1; - int frame_clk_master:1; + unsigned int bit_clk_inv:1; + unsigned int frame_clk_inv:1; + unsigned int bit_clk_master:1; + unsigned int frame_clk_master:1; }; /* -- GitLab From 1f363639eb30c2a4ef9ff125e2fbfe213d82a2f9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 11 Dec 2016 10:05:49 +0800 Subject: [PATCH 0531/1442] Revert "tty: serial: 8250: add CON_CONSDEV to flags" commit 6741f551a0b26479de2532ffa43a366747e6dbf3 upstream. This commit needs to be reverted because it prevents people from using the serial console as a secondary console with input being directed to tty0. IOW, if you boot with console=ttyS0 console=tty0 then all kernels prior to this commit will produce output on both ttyS0 and tty0 but input will only be taken from tty0. With this patch the serial console will always be the primary console instead of tty0, potentially preventing people from getting into their machines in emergency situations. Fixes: d03516df8375 ("tty: serial: 8250: add CON_CONSDEV to flags") Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 240a361b674f..e8819aa20415 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -675,7 +675,7 @@ static struct console univ8250_console = { .device = uart_console_device, .setup = univ8250_console_setup, .match = univ8250_console_match, - .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_CONSDEV, + .flags = CON_PRINTBUFFER | CON_ANYTIME, .index = -1, .data = &serial8250_reg, }; -- GitLab From 57bfd5a3710bff25e35974de6db8816103d44973 Mon Sep 17 00:00:00 2001 From: Augusto Mecking Caringi Date: Tue, 10 Jan 2017 10:45:00 +0000 Subject: [PATCH 0532/1442] vme: Fix wrong pointer utilization in ca91cx42_slave_get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c8a6a09c1c617402cc9254b2bc8da359a0347d75 upstream. In ca91cx42_slave_get function, the value pointed by vme_base pointer is set through: *vme_base = ioread32(bridge->base + CA91CX42_VSI_BS[i]); So it must be dereferenced to be used in calculation of pci_base: *pci_base = (dma_addr_t)*vme_base + pci_offset; This bug was caught thanks to the following gcc warning: drivers/vme/bridges/vme_ca91cx42.c: In function ‘ca91cx42_slave_get’: drivers/vme/bridges/vme_ca91cx42.c:467:14: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] *pci_base = (dma_addr_t)vme_base + pci_offset; Signed-off-by: Augusto Mecking Caringi Acked-By: Martyn Welch Signed-off-by: Greg Kroah-Hartman --- drivers/vme/bridges/vme_ca91cx42.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vme/bridges/vme_ca91cx42.c b/drivers/vme/bridges/vme_ca91cx42.c index 6b5ee896af63..7cc51223db1c 100644 --- a/drivers/vme/bridges/vme_ca91cx42.c +++ b/drivers/vme/bridges/vme_ca91cx42.c @@ -464,7 +464,7 @@ static int ca91cx42_slave_get(struct vme_slave_resource *image, int *enabled, vme_bound = ioread32(bridge->base + CA91CX42_VSI_BD[i]); pci_offset = ioread32(bridge->base + CA91CX42_VSI_TO[i]); - *pci_base = (dma_addr_t)vme_base + pci_offset; + *pci_base = (dma_addr_t)*vme_base + pci_offset; *size = (unsigned long long)((vme_bound - *vme_base) + granularity); *enabled = 0; -- GitLab From 52fd0ab07676b295abc54eeb1cef7a06d49c9847 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 4 Jan 2017 19:28:14 -0800 Subject: [PATCH 0533/1442] pid: fix lockdep deadlock warning due to ucount_lock commit add7c65ca426b7a37184dd3d2172394e23d585d6 upstream. ========================================================= [ INFO: possible irq lock inversion dependency detected ] 4.10.0-rc2-00024-g4aecec9-dirty #118 Tainted: G W --------------------------------------------------------- swapper/1/0 just changed the state of lock: (&(&sighand->siglock)->rlock){-.....}, at: [] __lock_task_sighand+0xb6/0x2c0 but this lock took another, HARDIRQ-unsafe lock in the past: (ucounts_lock){+.+...} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Chain exists of: &(&sighand->siglock)->rlock --> &(&tty->ctrl_lock)->rlock --> ucounts_lock Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(ucounts_lock); local_irq_disable(); lock(&(&sighand->siglock)->rlock); lock(&(&tty->ctrl_lock)->rlock); lock(&(&sighand->siglock)->rlock); *** DEADLOCK *** This patch removes a dependency between rlock and ucount_lock. Fixes: f333c700c610 ("pidns: Add a limit on the number of pid namespaces") Signed-off-by: Andrei Vagin Acked-by: Al Viro Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- kernel/pid_namespace.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index df9e8e9e0be7..eef2ce968636 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -151,8 +151,12 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns static void delayed_free_pidns(struct rcu_head *p) { - kmem_cache_free(pid_ns_cachep, - container_of(p, struct pid_namespace, rcu)); + struct pid_namespace *ns = container_of(p, struct pid_namespace, rcu); + + dec_pid_namespaces(ns->ucounts); + put_user_ns(ns->user_ns); + + kmem_cache_free(pid_ns_cachep, ns); } static void destroy_pid_namespace(struct pid_namespace *ns) @@ -162,8 +166,6 @@ static void destroy_pid_namespace(struct pid_namespace *ns) ns_free_inum(&ns->ns); for (i = 0; i < PIDMAP_ENTRIES; i++) kfree(ns->pidmap[i].page); - dec_pid_namespaces(ns->ucounts); - put_user_ns(ns->user_ns); call_rcu(&ns->rcu, delayed_free_pidns); } -- GitLab From 1a62a0f76556f39d6d67789bb981b28230aaa338 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 3 Jan 2017 14:18:43 +1300 Subject: [PATCH 0534/1442] mnt: Protect the mountpoint hashtable with mount_lock commit 3895dbf8985f656675b5bde610723a29cbce3fa7 upstream. Protecting the mountpoint hashtable with namespace_sem was sufficient until a call to umount_mnt was added to mntput_no_expire. At which point it became possible for multiple calls of put_mountpoint on the same hash chain to happen on the same time. Kristen Johansen reported: > This can cause a panic when simultaneous callers of put_mountpoint > attempt to free the same mountpoint. This occurs because some callers > hold the mount_hash_lock, while others hold the namespace lock. Some > even hold both. > > In this submitter's case, the panic manifested itself as a GP fault in > put_mountpoint() when it called hlist_del() and attempted to dereference > a m_hash.pprev that had been poisioned by another thread. Al Viro observed that the simple fix is to switch from using the namespace_sem to the mount_lock to protect the mountpoint hash table. I have taken Al's suggested patch moved put_mountpoint in pivot_root (instead of taking mount_lock an additional time), and have replaced new_mountpoint with get_mountpoint a function that does the hash table lookup and addition under the mount_lock. The introduction of get_mounptoint ensures that only the mount_lock is needed to manipulate the mountpoint hashtable. d_set_mounted is modified to only set DCACHE_MOUNTED if it is not already set. This allows get_mountpoint to use the setting of DCACHE_MOUNTED to ensure adding a struct mountpoint for a dentry happens exactly once. Fixes: ce07d891a089 ("mnt: Honor MNT_LOCKED when detaching mounts") Reported-by: Krister Johansen Suggested-by: Al Viro Acked-by: Al Viro Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 7 ++++-- fs/namespace.c | 64 +++++++++++++++++++++++++++++++++++--------------- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 5c7cc953ac81..4485a48f4091 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1330,8 +1330,11 @@ int d_set_mounted(struct dentry *dentry) } spin_lock(&dentry->d_lock); if (!d_unlinked(dentry)) { - dentry->d_flags |= DCACHE_MOUNTED; - ret = 0; + ret = -EBUSY; + if (!d_mountpoint(dentry)) { + dentry->d_flags |= DCACHE_MOUNTED; + ret = 0; + } } spin_unlock(&dentry->d_lock); out: diff --git a/fs/namespace.c b/fs/namespace.c index e6c234b1a645..7cea503ae06d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -746,26 +746,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry) return NULL; } -static struct mountpoint *new_mountpoint(struct dentry *dentry) +static struct mountpoint *get_mountpoint(struct dentry *dentry) { - struct hlist_head *chain = mp_hash(dentry); - struct mountpoint *mp; + struct mountpoint *mp, *new = NULL; int ret; - mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); - if (!mp) + if (d_mountpoint(dentry)) { +mountpoint: + read_seqlock_excl(&mount_lock); + mp = lookup_mountpoint(dentry); + read_sequnlock_excl(&mount_lock); + if (mp) + goto done; + } + + if (!new) + new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); + if (!new) return ERR_PTR(-ENOMEM); + + /* Exactly one processes may set d_mounted */ ret = d_set_mounted(dentry); - if (ret) { - kfree(mp); - return ERR_PTR(ret); - } - mp->m_dentry = dentry; - mp->m_count = 1; - hlist_add_head(&mp->m_hash, chain); - INIT_HLIST_HEAD(&mp->m_list); + /* Someone else set d_mounted? */ + if (ret == -EBUSY) + goto mountpoint; + + /* The dentry is not available as a mountpoint? */ + mp = ERR_PTR(ret); + if (ret) + goto done; + + /* Add the new mountpoint to the hash table */ + read_seqlock_excl(&mount_lock); + new->m_dentry = dentry; + new->m_count = 1; + hlist_add_head(&new->m_hash, mp_hash(dentry)); + INIT_HLIST_HEAD(&new->m_list); + read_sequnlock_excl(&mount_lock); + + mp = new; + new = NULL; +done: + kfree(new); return mp; } @@ -1568,11 +1592,11 @@ void __detach_mounts(struct dentry *dentry) struct mount *mnt; namespace_lock(); + lock_mount_hash(); mp = lookup_mountpoint(dentry); if (IS_ERR_OR_NULL(mp)) goto out_unlock; - lock_mount_hash(); event++; while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); @@ -1582,9 +1606,9 @@ void __detach_mounts(struct dentry *dentry) } else umount_tree(mnt, UMOUNT_CONNECTED); } - unlock_mount_hash(); put_mountpoint(mp); out_unlock: + unlock_mount_hash(); namespace_unlock(); } @@ -2013,9 +2037,7 @@ static struct mountpoint *lock_mount(struct path *path) namespace_lock(); mnt = lookup_mnt(path); if (likely(!mnt)) { - struct mountpoint *mp = lookup_mountpoint(dentry); - if (!mp) - mp = new_mountpoint(dentry); + struct mountpoint *mp = get_mountpoint(dentry); if (IS_ERR(mp)) { namespace_unlock(); inode_unlock(dentry->d_inode); @@ -2034,7 +2056,11 @@ static struct mountpoint *lock_mount(struct path *path) static void unlock_mount(struct mountpoint *where) { struct dentry *dentry = where->m_dentry; + + read_seqlock_excl(&mount_lock); put_mountpoint(where); + read_sequnlock_excl(&mount_lock); + namespace_unlock(); inode_unlock(dentry->d_inode); } @@ -3110,9 +3136,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); /* A moved mount should not expire automatically */ list_del_init(&new_mnt->mnt_expire); + put_mountpoint(root_mp); unlock_mount_hash(); chroot_fs_refs(&root, &new); - put_mountpoint(root_mp); error = 0; out4: unlock_mount(old_mp); -- GitLab From 3fbaff3adc763d999fa803bc1aeb5e49c48ce5c0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 5 Jan 2017 17:15:01 +0000 Subject: [PATCH 0535/1442] drivers: char: mem: Fix thinkos in kmem address checks commit 488debb9971bc7d0edd6d8080ba78ca02a04f6c4 upstream. When borrowing the pfn_valid() check from mmap_kmem(), somebody managed to get physical and virtual addresses spectacularly muddled up, such that we've ended up with checks for one being the other. Whilst this does indeed prevent out-of-bounds accesses crashing, on most systems it also prevents the more desirable use-case of working at all ever. Check the *virtual* offset correctly for what it is. Furthermore, do so in the right place - a read or write may span multiple pages, so a single up-front check is insufficient. High memory accesses already have a similar validity check just before the copy_to_user() call, so just make the low memory path fully consistent with that. Reported-by: Jason A. Donenfeld Fixes: 148a1bc84398 ("drivers: char: mem: Check {read,write}_kmem() addresses") Signed-off-by: Robin Murphy Signed-off-by: Greg Kroah-Hartman --- drivers/char/mem.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 5bb1985ec484..6d9cc2d39d22 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -381,9 +381,6 @@ static ssize_t read_kmem(struct file *file, char __user *buf, char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ int err = 0; - if (!pfn_valid(PFN_DOWN(p))) - return -EIO; - read = 0; if (p < (unsigned long) high_memory) { low_count = count; @@ -412,6 +409,8 @@ static ssize_t read_kmem(struct file *file, char __user *buf, * by the kernel or data corruption may occur */ kbuf = xlate_dev_kmem_ptr((void *)p); + if (!virt_addr_valid(kbuf)) + return -ENXIO; if (copy_to_user(buf, kbuf, sz)) return -EFAULT; @@ -482,6 +481,8 @@ static ssize_t do_write_kmem(unsigned long p, const char __user *buf, * corruption may occur. */ ptr = xlate_dev_kmem_ptr((void *)p); + if (!virt_addr_valid(ptr)) + return -ENXIO; copied = copy_from_user(ptr, buf, sz); if (copied) { @@ -512,9 +513,6 @@ static ssize_t write_kmem(struct file *file, const char __user *buf, char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ int err = 0; - if (!pfn_valid(PFN_DOWN(p))) - return -EIO; - if (p < (unsigned long) high_memory) { unsigned long to_write = min_t(unsigned long, count, (unsigned long)high_memory - p); -- GitLab From 6c6ae8c43df99284b3260082b43b5858b701bd5e Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 2 Jan 2017 12:07:37 +0200 Subject: [PATCH 0536/1442] dmaengine: omap-dma: Fix dynamic lch_map allocation commit 836c3ce2566fb8c1754f8d7c9534cad9bc8a6879 upstream. The original patch did not done what it was supposed to be doing and even worst it broke legacy boot (OMAP1). The lch_map size should be the number of available logical channels in sDMA and the od->dma_requests should store the number of available DMA request lines usable in sDMA. In legacy mode we do not have a way to get the DMA request count, in that case we use OMAP_SDMA_REQUESTS (127), despite the fact that OMAP1510 have only 31 DMA request line. Fixes: 2d1a9a946fae ("dmaengine: omap-dma: Dynamically allocate memory for lch_map") Reported-by: Aaro Koskinen Signed-off-by: Peter Ujfalusi Tested-by: Aaro Koskinen Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/omap-dma.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index 7ca27d4b1c54..6b16ce390dce 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -1339,6 +1339,7 @@ static int omap_dma_probe(struct platform_device *pdev) struct omap_dmadev *od; struct resource *res; int rc, i, irq; + u32 lch_count; od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL); if (!od) @@ -1381,20 +1382,31 @@ static int omap_dma_probe(struct platform_device *pdev) spin_lock_init(&od->lock); spin_lock_init(&od->irq_lock); - if (!pdev->dev.of_node) { - od->dma_requests = od->plat->dma_attr->lch_count; - if (unlikely(!od->dma_requests)) - od->dma_requests = OMAP_SDMA_REQUESTS; - } else if (of_property_read_u32(pdev->dev.of_node, "dma-requests", - &od->dma_requests)) { + /* Number of DMA requests */ + od->dma_requests = OMAP_SDMA_REQUESTS; + if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node, + "dma-requests", + &od->dma_requests)) { dev_info(&pdev->dev, "Missing dma-requests property, using %u.\n", OMAP_SDMA_REQUESTS); - od->dma_requests = OMAP_SDMA_REQUESTS; } - od->lch_map = devm_kcalloc(&pdev->dev, od->dma_requests, - sizeof(*od->lch_map), GFP_KERNEL); + /* Number of available logical channels */ + if (!pdev->dev.of_node) { + lch_count = od->plat->dma_attr->lch_count; + if (unlikely(!lch_count)) + lch_count = OMAP_SDMA_CHANNELS; + } else if (of_property_read_u32(pdev->dev.of_node, "dma-channels", + &lch_count)) { + dev_info(&pdev->dev, + "Missing dma-channels property, using %u.\n", + OMAP_SDMA_CHANNELS); + lch_count = OMAP_SDMA_CHANNELS; + } + + od->lch_map = devm_kcalloc(&pdev->dev, lch_count, sizeof(*od->lch_map), + GFP_KERNEL); if (!od->lch_map) return -ENOMEM; -- GitLab From 199c89fd32b2a127f12d5614a701e275f7f5a10f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 08:56:23 -0700 Subject: [PATCH 0537/1442] virtio_blk: avoid DMA to stack for the sense buffer commit a14d749fcebe97ddf6af6db3d1f6ece85c9ddcb9 upstream. Most users of BLOCK_PC requests allocate the sense buffer on the stack, so to avoid DMA to the stack copy them to a field in the heap allocated virtblk_req structure. Without that any attempt at SCSI passthrough I/O, including the SG_IO ioctl from userspace will crash the kernel. Note that this includes running tools like hdparm even when the host does not have SCSI passthrough enabled. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/virtio_blk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 5545a679abd8..3c3b8f601469 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -56,6 +56,7 @@ struct virtblk_req { struct virtio_blk_outhdr out_hdr; struct virtio_scsi_inhdr in_hdr; u8 status; + u8 sense[SCSI_SENSE_BUFFERSIZE]; struct scatterlist sg[]; }; @@ -102,7 +103,8 @@ static int __virtblk_add_req(struct virtqueue *vq, } if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) { - sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE); + memcpy(vbr->sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE); + sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE); sgs[num_out + num_in++] = &sense; sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr)); sgs[num_out + num_in++] = &inhdr; -- GitLab From 2d789bd1fc24788116ee0711208ef439a8db7ba5 Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 6 Dec 2016 13:05:33 +0100 Subject: [PATCH 0538/1442] tty/serial: atmel: RS485 half duplex w/DMA: enable RX after TX is done commit b389f173aaa1204d6dc1f299082a162eb0491545 upstream. When using RS485 in half duplex, RX should be enabled when TX is finished, and stopped when TX starts. Before commit 0058f0871efe7b01c6 ("tty/serial: atmel: fix RS485 half duplex with DMA"), RX was not disabled in atmel_start_tx() if the DMA was used. So, collisions could happened. But disabling RX in atmel_start_tx() uncovered another bug: RX was enabled again in the wrong place (in atmel_tx_dma) instead of being enabled when TX is finished (in atmel_complete_tx_dma), so the transmission simply stopped. This bug was not triggered before commit 0058f0871efe7b01c6 ("tty/serial: atmel: fix RS485 half duplex with DMA") because RX was never disabled before. Moving atmel_start_rx() in atmel_complete_tx_dma() corrects the problem. Reported-by: Gil Weber Fixes: 0058f0871efe7b01c6 Tested-by: Gil Weber Signed-off-by: Richard Genoud Acked-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 168b10cad47b..11c0117af80b 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -798,6 +798,11 @@ static void atmel_complete_tx_dma(void *arg) */ if (!uart_circ_empty(xmit)) atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx); + else if ((port->rs485.flags & SER_RS485_ENABLED) && + !(port->rs485.flags & SER_RS485_RX_DURING_TX)) { + /* DMA done, stop TX, start RX for RS485 */ + atmel_start_rx(port); + } spin_unlock_irqrestore(&port->lock, flags); } @@ -900,12 +905,6 @@ static void atmel_tx_dma(struct uart_port *port) desc->callback = atmel_complete_tx_dma; desc->callback_param = atmel_port; atmel_port->cookie_tx = dmaengine_submit(desc); - - } else { - if (port->rs485.flags & SER_RS485_ENABLED) { - /* DMA done, stop TX, start RX for RS485 */ - atmel_start_rx(port); - } } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) -- GitLab From ba04d869974ed02c8ac1769a6693d925bd861f68 Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Tue, 13 Dec 2016 17:27:56 +0100 Subject: [PATCH 0539/1442] tty/serial: atmel_serial: BUG: stop DMA from transmitting in stop_tx commit 89d8232411a85b9a6b12fd5da4d07d8a138a8e0c upstream. If we don't disable the transmitter in atmel_stop_tx, the DMA buffer continues to send data until it is emptied. This cause problems with the flow control (CTS is asserted and data are still sent). So, disabling the transmitter in atmel_stop_tx is a sane thing to do. Tested on at91sam9g35-cm(DMA) Tested for regressions on sama5d2-xplained(Fifo) and at91sam9g20ek(PDC) Signed-off-by: Richard Genoud Acked-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 11c0117af80b..fabbe76203bb 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -481,6 +481,14 @@ static void atmel_stop_tx(struct uart_port *port) /* disable PDC transmit */ atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS); } + + /* + * Disable the transmitter. + * This is mandatory when DMA is used, otherwise the DMA buffer + * is fully transmitted. + */ + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXDIS); + /* Disable interrupts */ atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); @@ -513,6 +521,9 @@ static void atmel_start_tx(struct uart_port *port) /* Enable interrupts */ atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask); + + /* re-enable the transmitter */ + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN); } /* -- GitLab From 0edcc47cdb7f2d80c1e7236f5f797b7443cb2142 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Mon, 9 Jan 2017 10:21:20 -0600 Subject: [PATCH 0540/1442] ibmvscsis: Fix srp_transfer_data fail return code commit 7c9d8d0c41b3e24473ac7648a7fc2d644ccf08ff upstream. If srp_transfer_data fails within ibmvscsis_write_pending, then the most likely scenario is that the client timed out the op and removed the TCE mapping. Thus it will loop forever retrying the op that is pretty much guaranteed to fail forever. A better return code would be EIO instead of EAGAIN. Reported-by: Steven Royer Tested-by: Steven Royer Signed-off-by: Bryant G. Ly Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 642b739ad0da..608140f16d98 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3702,7 +3702,7 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd) 1, 1); if (rc) { pr_err("srp_transfer_data() failed: %d\n", rc); - return -EAGAIN; + return -EIO; } /* * We now tell TCM to add this WRITE CDB directly into the TCM storage -- GitLab From 4a940d6503e1ac6c6ba235a8f4c9b4fd61934fee Mon Sep 17 00:00:00 2001 From: Andrew Lutomirski Date: Mon, 12 Dec 2016 12:55:55 -0800 Subject: [PATCH 0541/1442] orinoco: Use shash instead of ahash for MIC calculations commit 570b90fa230b8021f51a67fab2245fe8df6fe37d upstream. Eric Biggers pointed out that the orinoco driver pointed scatterlists at the stack. Fix it by switching from ahash to shash. The result should be simpler, faster, and more correct. kvalo: cherry picked from commit 1fef293b8a9850cfa124a53c1d8878d355010403 as I accidentally applied this patch to wireless-drivers-next when I was supposed to apply this wireless-drivers Reported-by: Eric Biggers Signed-off-by: Andy Lutomirski Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intersil/orinoco/mic.c | 44 +++++++++++-------- drivers/net/wireless/intersil/orinoco/mic.h | 3 +- .../net/wireless/intersil/orinoco/orinoco.h | 4 +- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/intersil/orinoco/mic.c b/drivers/net/wireless/intersil/orinoco/mic.c index bc7397d709d3..08bc7822f820 100644 --- a/drivers/net/wireless/intersil/orinoco/mic.c +++ b/drivers/net/wireless/intersil/orinoco/mic.c @@ -16,7 +16,7 @@ /********************************************************************/ int orinoco_mic_init(struct orinoco_private *priv) { - priv->tx_tfm_mic = crypto_alloc_ahash("michael_mic", 0, + priv->tx_tfm_mic = crypto_alloc_shash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_mic)) { printk(KERN_DEBUG "orinoco_mic_init: could not allocate " @@ -25,7 +25,7 @@ int orinoco_mic_init(struct orinoco_private *priv) return -ENOMEM; } - priv->rx_tfm_mic = crypto_alloc_ahash("michael_mic", 0, + priv->rx_tfm_mic = crypto_alloc_shash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_mic)) { printk(KERN_DEBUG "orinoco_mic_init: could not allocate " @@ -40,17 +40,16 @@ int orinoco_mic_init(struct orinoco_private *priv) void orinoco_mic_free(struct orinoco_private *priv) { if (priv->tx_tfm_mic) - crypto_free_ahash(priv->tx_tfm_mic); + crypto_free_shash(priv->tx_tfm_mic); if (priv->rx_tfm_mic) - crypto_free_ahash(priv->rx_tfm_mic); + crypto_free_shash(priv->rx_tfm_mic); } -int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key, +int orinoco_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *da, u8 *sa, u8 priority, u8 *data, size_t data_len, u8 *mic) { - AHASH_REQUEST_ON_STACK(req, tfm_michael); - struct scatterlist sg[2]; + SHASH_DESC_ON_STACK(desc, tfm_michael); u8 hdr[ETH_HLEN + 2]; /* size of header + padding */ int err; @@ -67,18 +66,27 @@ int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key, hdr[ETH_ALEN * 2 + 2] = 0; hdr[ETH_ALEN * 2 + 3] = 0; - /* Use scatter gather to MIC header and data in one go */ - sg_init_table(sg, 2); - sg_set_buf(&sg[0], hdr, sizeof(hdr)); - sg_set_buf(&sg[1], data, data_len); + desc->tfm = tfm_michael; + desc->flags = 0; - if (crypto_ahash_setkey(tfm_michael, key, MIC_KEYLEN)) - return -1; + err = crypto_shash_setkey(tfm_michael, key, MIC_KEYLEN); + if (err) + return err; + + err = crypto_shash_init(desc); + if (err) + return err; + + err = crypto_shash_update(desc, hdr, sizeof(hdr)); + if (err) + return err; + + err = crypto_shash_update(desc, data, data_len); + if (err) + return err; + + err = crypto_shash_final(desc, mic); + shash_desc_zero(desc); - ahash_request_set_tfm(req, tfm_michael); - ahash_request_set_callback(req, 0, NULL, NULL); - ahash_request_set_crypt(req, sg, mic, data_len + sizeof(hdr)); - err = crypto_ahash_digest(req); - ahash_request_zero(req); return err; } diff --git a/drivers/net/wireless/intersil/orinoco/mic.h b/drivers/net/wireless/intersil/orinoco/mic.h index ce731d05cc98..e8724e889219 100644 --- a/drivers/net/wireless/intersil/orinoco/mic.h +++ b/drivers/net/wireless/intersil/orinoco/mic.h @@ -6,6 +6,7 @@ #define _ORINOCO_MIC_H_ #include +#include #define MICHAEL_MIC_LEN 8 @@ -15,7 +16,7 @@ struct crypto_ahash; int orinoco_mic_init(struct orinoco_private *priv); void orinoco_mic_free(struct orinoco_private *priv); -int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key, +int orinoco_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *da, u8 *sa, u8 priority, u8 *data, size_t data_len, u8 *mic); diff --git a/drivers/net/wireless/intersil/orinoco/orinoco.h b/drivers/net/wireless/intersil/orinoco/orinoco.h index 2f0c84b1c440..5fa1c3e3713f 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco.h +++ b/drivers/net/wireless/intersil/orinoco/orinoco.h @@ -152,8 +152,8 @@ struct orinoco_private { u8 *wpa_ie; int wpa_ie_len; - struct crypto_ahash *rx_tfm_mic; - struct crypto_ahash *tx_tfm_mic; + struct crypto_shash *rx_tfm_mic; + struct crypto_shash *tx_tfm_mic; unsigned int wpa_enabled:1; unsigned int tkip_cm_active:1; -- GitLab From ef8ee4495171489ae456c2a6304709cfb5d58881 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 6 Jan 2017 02:14:16 +0900 Subject: [PATCH 0542/1442] sysrq: attach sysrq handler correctly for 32-bit kernel commit 802c03881f29844af0252b6e22be5d2f65f93fd0 upstream. The sysrq input handler should be attached to the input device which has a left alt key. On 32-bit kernels, some input devices which has a left alt key cannot attach sysrq handler. Because the keybit bitmap in struct input_device_id for sysrq is not correctly initialized. KEY_LEFTALT is 56 which is greater than BITS_PER_LONG on 32-bit kernels. I found this problem when using a matrix keypad device which defines a KEY_LEFTALT (56) but doesn't have a KEY_O (24 == 56%32). Cc: Jiri Slaby Signed-off-by: Akinobu Mita Acked-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/sysrq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 52bbd27e93ae..701c085bb19b 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -946,8 +946,8 @@ static const struct input_device_id sysrq_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT_MASK(EV_KEY) }, - .keybit = { BIT_MASK(KEY_LEFTALT) }, + .evbit = { [BIT_WORD(EV_KEY)] = BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_LEFTALT)] = BIT_MASK(KEY_LEFTALT) }, }, { }, }; -- GitLab From 86820a103f517f8d51a09ceacac30bde355f4e89 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 16:56:49 +0800 Subject: [PATCH 0543/1442] extcon: return error code on failure commit 5b11ebedd6a8bb4271b796e498cd15c0fe1133b6 upstream. Function get_zeroed_page() returns a NULL pointer if there is no enough memory. In function extcon_sync(), it returns 0 if the call to get_zeroed_page() fails. The return value 0 indicates success in the context, which is incosistent with the execution status. This patch fixes the bug by returning -ENOMEM. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188611 Signed-off-by: Pan Bian Fixes: a580982f0836e Acked-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- drivers/extcon/extcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index 78298460d168..7c1e3a7b14e0 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -453,7 +453,7 @@ int extcon_sync(struct extcon_dev *edev, unsigned int id) dev_err(&edev->dev, "out of memory in extcon_set_state\n"); kobject_uevent(&edev->dev.kobj, KOBJ_CHANGE); - return 0; + return -ENOMEM; } length = name_show(&edev->dev, NULL, prop_buf); -- GitLab From eca02f01be96d2ced5b2314dc4462c32e4ad0366 Mon Sep 17 00:00:00 2001 From: Daniel Jedrychowski Date: Mon, 12 Dec 2016 09:18:28 +1100 Subject: [PATCH 0544/1442] Clearing FIFOs in RS485 emulation mode causes subsequent transmits to break commit 2bed8a8e70729f996af92042d3ad0f11870acc1f upstream. When in RS485 emulation mode, __do_stop_tx_rs485() calls serial8250_clear_fifos(). This not only clears the FIFOs, but also sets all bits in their control register (UART_FCR) to 0. One of the effects of this is the disabling of the FIFOs, which turns them into single-byte holding registers. The rest of the driver doesn't know this, which results in the lions share of characters passed into a write call to be dropped. (I can supply logic analyzer screenshots if necessary) This fix replaces the serial8250_clear_fifos() call to serial8250_clear_and_reinit_fifos() - this prevents the "dropped characters" issue from manifesting again while retaining the requirement of clearing the RX FIFO after transmission if the SER_RS485_RX_DURING_TX flag is disabled. Signed-off-by: Daniel Jedrychowski Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 1731b98d2471..080d5a59d0a7 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1411,7 +1411,7 @@ static void __do_stop_tx_rs485(struct uart_8250_port *p) * Enable previously disabled RX interrupts. */ if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) { - serial8250_clear_fifos(p); + serial8250_clear_and_reinit_fifos(p); p->ier |= UART_IER_RLSI | UART_IER_RDI; serial_port_out(&p->port, UART_IER, p->ier); -- GitLab From 00cf64fbaa1e99d0420f2934f301c671ba298342 Mon Sep 17 00:00:00 2001 From: Zhou Chengming Date: Fri, 6 Jan 2017 09:32:32 +0800 Subject: [PATCH 0545/1442] sysctl: Drop reference added by grab_header in proc_sys_readdir commit 93362fa47fe98b62e4a34ab408c4a418432e7939 upstream. Fixes CVE-2016-9191, proc_sys_readdir doesn't drop reference added by grab_header when return from !dir_emit_dots path. It can cause any path called unregister_sysctl_table will wait forever. The calltrace of CVE-2016-9191: [ 5535.960522] Call Trace: [ 5535.963265] [] schedule+0x3f/0xa0 [ 5535.968817] [] schedule_timeout+0x3db/0x6f0 [ 5535.975346] [] ? wait_for_completion+0x45/0x130 [ 5535.982256] [] wait_for_completion+0xc3/0x130 [ 5535.988972] [] ? wake_up_q+0x80/0x80 [ 5535.994804] [] drop_sysctl_table+0xc4/0xe0 [ 5536.001227] [] drop_sysctl_table+0x77/0xe0 [ 5536.007648] [] unregister_sysctl_table+0x4d/0xa0 [ 5536.014654] [] unregister_sysctl_table+0x7f/0xa0 [ 5536.021657] [] unregister_sched_domain_sysctl+0x15/0x40 [ 5536.029344] [] partition_sched_domains+0x44/0x450 [ 5536.036447] [] ? __mutex_unlock_slowpath+0x111/0x1f0 [ 5536.043844] [] rebuild_sched_domains_locked+0x64/0xb0 [ 5536.051336] [] update_flag+0x11d/0x210 [ 5536.057373] [] ? mutex_lock_nested+0x2df/0x450 [ 5536.064186] [] ? cpuset_css_offline+0x1b/0x60 [ 5536.070899] [] ? trace_hardirqs_on+0xd/0x10 [ 5536.077420] [] ? mutex_lock_nested+0x2df/0x450 [ 5536.084234] [] ? css_killed_work_fn+0x25/0x220 [ 5536.091049] [] cpuset_css_offline+0x35/0x60 [ 5536.097571] [] css_killed_work_fn+0x5c/0x220 [ 5536.104207] [] process_one_work+0x1df/0x710 [ 5536.110736] [] ? process_one_work+0x160/0x710 [ 5536.117461] [] worker_thread+0x12b/0x4a0 [ 5536.123697] [] ? process_one_work+0x710/0x710 [ 5536.130426] [] kthread+0xfe/0x120 [ 5536.135991] [] ret_from_fork+0x1f/0x40 [ 5536.142041] [] ? kthread_create_on_node+0x230/0x230 One cgroup maintainer mentioned that "cgroup is trying to offline a cpuset css, which takes place under cgroup_mutex. The offlining ends up trying to drain active usages of a sysctl table which apprently is not happening." The real reason is that proc_sys_readdir doesn't drop reference added by grab_header when return from !dir_emit_dots path. So this cpuset offline path will wait here forever. See here for details: http://www.openwall.com/lists/oss-security/2016/11/04/13 Fixes: f0c3b5093add ("[readdir] convert procfs") Reported-by: CAI Qian Tested-by: Yang Shukui Signed-off-by: Zhou Chengming Acked-by: Al Viro Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- fs/proc/proc_sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 55313d994895..d4e37acd4821 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -709,7 +709,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx) ctl_dir = container_of(head, struct ctl_dir, header); if (!dir_emit_dots(file, ctx)) - return 0; + goto out; pos = 2; @@ -719,6 +719,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx) break; } } +out: sysctl_head_finish(head); return 0; } -- GitLab From 9297e0c189f59f42e5d39aee901d54a4493a60e2 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 10 Jan 2017 17:10:34 +0100 Subject: [PATCH 0546/1442] net/af_iucv: don't use paged skbs for TX on HiperSockets commit dc5367bcc556e97555fc94a32cd1aadbebdff47e upstream. With commit e53743994e21 ("af_iucv: use paged SKBs for big outbound messages"), we transmit paged skbs for both of AF_IUCV's transport modes (IUCV or HiperSockets). The qeth driver for Layer 3 HiperSockets currently doesn't support NETIF_F_SG, so these skbs would just be linearized again by the stack. Avoid that overhead by using paged skbs only for IUCV transport. cc stable, since this also circumvents a significant skb leak when sending large messages (where the skb then needs to be linearized). Signed-off-by: Julian Wiedmann Signed-off-by: Ursula Braun Fixes: e53743994e21 ("af_iucv: use paged SKBs for big outbound messages") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/iucv/af_iucv.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 02b45a8e8b35..91cbbf1c3f82 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1036,7 +1036,8 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); - size_t headroom, linear; + size_t headroom = 0; + size_t linear; struct sk_buff *skb; struct iucv_message txmsg = {0}; struct cmsghdr *cmsg; @@ -1114,18 +1115,20 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, * this is fine for SOCK_SEQPACKET (unless we want to support * segmented records using the MSG_EOR flag), but * for SOCK_STREAM we might want to improve it in future */ - headroom = (iucv->transport == AF_IUCV_TRANS_HIPER) - ? sizeof(struct af_iucv_trans_hdr) + ETH_HLEN : 0; - if (headroom + len < PAGE_SIZE) { + if (iucv->transport == AF_IUCV_TRANS_HIPER) { + headroom = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; linear = len; } else { - /* In nonlinear "classic" iucv skb, - * reserve space for iucv_array - */ - if (iucv->transport != AF_IUCV_TRANS_HIPER) - headroom += sizeof(struct iucv_array) * - (MAX_SKB_FRAGS + 1); - linear = PAGE_SIZE - headroom; + if (len < PAGE_SIZE) { + linear = len; + } else { + /* In nonlinear "classic" iucv skb, + * reserve space for iucv_array + */ + headroom = sizeof(struct iucv_array) * + (MAX_SKB_FRAGS + 1); + linear = PAGE_SIZE - headroom; + } } skb = sock_alloc_send_pskb(sk, headroom + linear, len - linear, noblock, &err, 0); -- GitLab From 6d15a83af98200f0ebdd36ac1d1cda743e76ad90 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 16 Jan 2017 18:59:46 +0200 Subject: [PATCH 0547/1442] drm/i915/gen9: Fix PCODE polling timeout in stable backport The backport of 2c7d0602c - "Fix PCODE polling during CDCLK change notification" to the 4.9 stable tree used an incorrect timeout value. Fix this up so the backport matches the upstream commit. Reported-by: Thomas Backlund Signed-off-by: Imre Deak Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 985cb31f4b44..22313980789b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8015,14 +8015,14 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, * worst case) _and_ PCODE was busy for some reason even after a * (queued) request and @timeout_base_ms delay. As a workaround retry * the poll with preemption disabled to maximize the number of - * requests. Increase the timeout from @timeout_base_ms to 50ms to + * requests. Increase the timeout from @timeout_base_ms to 10ms to * account for interrupts that could reduce the number of these * requests. */ DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); WARN_ON_ONCE(timeout_base_ms > 3); preempt_disable(); - ret = wait_for_atomic(COND, 50); + ret = wait_for_atomic(COND, 10); preempt_enable(); out: -- GitLab From 5375b71f3b1784c430ca7c58e6aae6bba8ef222c Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 3 Jan 2017 01:14:27 +0200 Subject: [PATCH 0548/1442] drm: Clean up planes in atomic commit helper failure path commit aebe55c2d4b998741c0847ace1b4af47d73c763b upstream. If waiting for fences fails for blocking commits, planes must be cleaned up before returning. Fixes: f6ce410a59a4 ("drm/fence: allow fence waiting to be interrupted by userspace") Signed-off-by: Laurent Pinchart Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170102231427.7192-1-laurent.pinchart@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic_helper.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 21f992605541..338766c64c99 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1253,8 +1253,10 @@ int drm_atomic_helper_commit(struct drm_device *dev, if (!nonblock) { ret = drm_atomic_helper_wait_for_fences(dev, state, true); - if (ret) + if (ret) { + drm_atomic_helper_cleanup_planes(dev, state); return ret; + } } /* -- GitLab From 7cd7aea3dc78c5f592516159c719b8e3c7540449 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 Jan 2017 12:15:52 -0500 Subject: [PATCH 0549/1442] drm/radeon: update smc firmware selection for SI commit 6458bd4dfd9414cba5804eb9907fe2a824278c34 upstream. Use the appropriate smc firmware for each chip revision. Using the wrong one can cause stability issues. Acked-by: Edward O'Callaghan Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si.c | 60 +++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 125c7e82c3d1..877af4a5ef68 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -50,7 +50,6 @@ MODULE_FIRMWARE("radeon/tahiti_ce.bin"); MODULE_FIRMWARE("radeon/tahiti_mc.bin"); MODULE_FIRMWARE("radeon/tahiti_rlc.bin"); MODULE_FIRMWARE("radeon/tahiti_smc.bin"); -MODULE_FIRMWARE("radeon/tahiti_k_smc.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_me.bin"); @@ -1657,9 +1656,6 @@ static int si_init_microcode(struct radeon_device *rdev) switch (rdev->family) { case CHIP_TAHITI: chip_name = "TAHITI"; - /* XXX: figure out which Tahitis need the new ucode */ - if (0) - new_smc = true; new_chip_name = "tahiti"; pfp_req_size = SI_PFP_UCODE_SIZE * 4; me_req_size = SI_PM4_UCODE_SIZE * 4; @@ -1671,12 +1667,9 @@ static int si_init_microcode(struct radeon_device *rdev) break; case CHIP_PITCAIRN: chip_name = "PITCAIRN"; - if ((rdev->pdev->revision == 0x81) || - (rdev->pdev->device == 0x6810) || - (rdev->pdev->device == 0x6811) || - (rdev->pdev->device == 0x6816) || - (rdev->pdev->device == 0x6817) || - (rdev->pdev->device == 0x6806)) + if ((rdev->pdev->revision == 0x81) && + ((rdev->pdev->device == 0x6810) || + (rdev->pdev->device == 0x6811))) new_smc = true; new_chip_name = "pitcairn"; pfp_req_size = SI_PFP_UCODE_SIZE * 4; @@ -1689,15 +1682,15 @@ static int si_init_microcode(struct radeon_device *rdev) break; case CHIP_VERDE: chip_name = "VERDE"; - if ((rdev->pdev->revision == 0x81) || - (rdev->pdev->revision == 0x83) || - (rdev->pdev->revision == 0x87) || - (rdev->pdev->device == 0x6820) || - (rdev->pdev->device == 0x6821) || - (rdev->pdev->device == 0x6822) || - (rdev->pdev->device == 0x6823) || - (rdev->pdev->device == 0x682A) || - (rdev->pdev->device == 0x682B)) + if (((rdev->pdev->device == 0x6820) && + ((rdev->pdev->revision == 0x81) || + (rdev->pdev->revision == 0x83))) || + ((rdev->pdev->device == 0x6821) && + ((rdev->pdev->revision == 0x83) || + (rdev->pdev->revision == 0x87))) || + ((rdev->pdev->revision == 0x87) && + ((rdev->pdev->device == 0x6823) || + (rdev->pdev->device == 0x682b)))) new_smc = true; new_chip_name = "verde"; pfp_req_size = SI_PFP_UCODE_SIZE * 4; @@ -1710,13 +1703,13 @@ static int si_init_microcode(struct radeon_device *rdev) break; case CHIP_OLAND: chip_name = "OLAND"; - if ((rdev->pdev->revision == 0xC7) || - (rdev->pdev->revision == 0x80) || - (rdev->pdev->revision == 0x81) || - (rdev->pdev->revision == 0x83) || - (rdev->pdev->revision == 0x87) || - (rdev->pdev->device == 0x6604) || - (rdev->pdev->device == 0x6605)) + if (((rdev->pdev->revision == 0x81) && + ((rdev->pdev->device == 0x6600) || + (rdev->pdev->device == 0x6604) || + (rdev->pdev->device == 0x6605) || + (rdev->pdev->device == 0x6610))) || + ((rdev->pdev->revision == 0x83) && + (rdev->pdev->device == 0x6610))) new_smc = true; new_chip_name = "oland"; pfp_req_size = SI_PFP_UCODE_SIZE * 4; @@ -1728,12 +1721,15 @@ static int si_init_microcode(struct radeon_device *rdev) break; case CHIP_HAINAN: chip_name = "HAINAN"; - if ((rdev->pdev->revision == 0x81) || - (rdev->pdev->revision == 0x83) || - (rdev->pdev->revision == 0xC3) || - (rdev->pdev->device == 0x6664) || - (rdev->pdev->device == 0x6665) || - (rdev->pdev->device == 0x6667)) + if (((rdev->pdev->revision == 0x81) && + (rdev->pdev->device == 0x6660)) || + ((rdev->pdev->revision == 0x83) && + ((rdev->pdev->device == 0x6660) || + (rdev->pdev->device == 0x6663) || + (rdev->pdev->device == 0x6665) || + (rdev->pdev->device == 0x6667))) || + ((rdev->pdev->revision == 0xc3) && + (rdev->pdev->device == 0x6665))) new_smc = true; new_chip_name = "hainan"; pfp_req_size = SI_PFP_UCODE_SIZE * 4; -- GitLab From 0c37b07002d5f619ec6d691bb6f6c2a2cab78d5b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 Jan 2017 12:39:01 -0500 Subject: [PATCH 0550/1442] drm/radeon: drop verde dpm quirks commit 8a08403bcb39f5d0e733bcf59a8a74f16b538f6e upstream. fixes: https://bugs.freedesktop.org/show_bug.cgi?id=98897 https://bugs.launchpad.net/bugs/1651981 Acked-by: Edward O'Callaghan Signed-off-by: Alex Deucher Cc: Adrian Fiergolski Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 8b5e697f2549..13ba73fd9b68 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3008,19 +3008,6 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, (rdev->pdev->device == 0x6817) || (rdev->pdev->device == 0x6806)) max_mclk = 120000; - } else if (rdev->family == CHIP_VERDE) { - if ((rdev->pdev->revision == 0x81) || - (rdev->pdev->revision == 0x83) || - (rdev->pdev->revision == 0x87) || - (rdev->pdev->device == 0x6820) || - (rdev->pdev->device == 0x6821) || - (rdev->pdev->device == 0x6822) || - (rdev->pdev->device == 0x6823) || - (rdev->pdev->device == 0x682A) || - (rdev->pdev->device == 0x682B)) { - max_sclk = 75000; - max_mclk = 80000; - } } else if (rdev->family == CHIP_OLAND) { if ((rdev->pdev->revision == 0xC7) || (rdev->pdev->revision == 0x80) || -- GitLab From d52e714b39cecab78c0ad83e60c81f3a90a42fef Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Thu, 15 Dec 2016 13:43:59 +0800 Subject: [PATCH 0551/1442] drm/amdgpu: update si kicker smc firmware commit 5165484b02f2cbedb5bf3a41ff5e8ae16069016c upstream. Use the appropriate smc firmware for each chip revision. Using the wrong one can cause stability issues. Acked-by: Edward O'Callaghan Signed-off-by: Flora Cui Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 57 ++++++++++++++--------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 6d2ea76f4eb6..0590509da00c 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -56,7 +56,6 @@ #define BIOS_SCRATCH_4 0x5cd MODULE_FIRMWARE("radeon/tahiti_smc.bin"); -MODULE_FIRMWARE("radeon/tahiti_k_smc.bin"); MODULE_FIRMWARE("radeon/pitcairn_smc.bin"); MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin"); MODULE_FIRMWARE("radeon/verde_smc.bin"); @@ -7685,49 +7684,49 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev) chip_name = "tahiti"; break; case CHIP_PITCAIRN: - if ((adev->pdev->revision == 0x81) || - (adev->pdev->device == 0x6810) || - (adev->pdev->device == 0x6811) || - (adev->pdev->device == 0x6816) || - (adev->pdev->device == 0x6817) || - (adev->pdev->device == 0x6806)) + if ((adev->pdev->revision == 0x81) && + ((adev->pdev->device == 0x6810) || + (adev->pdev->device == 0x6811))) chip_name = "pitcairn_k"; else chip_name = "pitcairn"; break; case CHIP_VERDE: - if ((adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0x87) || - (adev->pdev->device == 0x6820) || - (adev->pdev->device == 0x6821) || - (adev->pdev->device == 0x6822) || - (adev->pdev->device == 0x6823) || - (adev->pdev->device == 0x682A) || - (adev->pdev->device == 0x682B)) + if (((adev->pdev->device == 0x6820) && + ((adev->pdev->revision == 0x81) || + (adev->pdev->revision == 0x83))) || + ((adev->pdev->device == 0x6821) && + ((adev->pdev->revision == 0x83) || + (adev->pdev->revision == 0x87))) || + ((adev->pdev->revision == 0x87) && + ((adev->pdev->device == 0x6823) || + (adev->pdev->device == 0x682b)))) chip_name = "verde_k"; else chip_name = "verde"; break; case CHIP_OLAND: - if ((adev->pdev->revision == 0xC7) || - (adev->pdev->revision == 0x80) || - (adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0x87) || - (adev->pdev->device == 0x6604) || - (adev->pdev->device == 0x6605)) + if (((adev->pdev->revision == 0x81) && + ((adev->pdev->device == 0x6600) || + (adev->pdev->device == 0x6604) || + (adev->pdev->device == 0x6605) || + (adev->pdev->device == 0x6610))) || + ((adev->pdev->revision == 0x83) && + (adev->pdev->device == 0x6610))) chip_name = "oland_k"; else chip_name = "oland"; break; case CHIP_HAINAN: - if ((adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0xC3) || - (adev->pdev->device == 0x6664) || - (adev->pdev->device == 0x6665) || - (adev->pdev->device == 0x6667)) + if (((adev->pdev->revision == 0x81) && + (adev->pdev->device == 0x6660)) || + ((adev->pdev->revision == 0x83) && + ((adev->pdev->device == 0x6660) || + (adev->pdev->device == 0x6663) || + (adev->pdev->device == 0x6665) || + (adev->pdev->device == 0x6667))) || + ((adev->pdev->revision == 0xc3) && + (adev->pdev->device == 0x6665))) chip_name = "hainan_k"; else chip_name = "hainan"; -- GitLab From 12a26bd342b5828b87e098291d17f9a188a0e7e6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 Jan 2017 13:02:37 -0500 Subject: [PATCH 0552/1442] drm/amdgpu: drop verde dpm quirks commit 7192c54a68013f6058b1bb505645fcd07015191c upstream. Port of radeon change to amdgpu. Acked-by: Edward O'Callaghan Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 0590509da00c..b447a01ab21a 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3485,19 +3485,6 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, (adev->pdev->device == 0x6817) || (adev->pdev->device == 0x6806)) max_mclk = 120000; - } else if (adev->asic_type == CHIP_VERDE) { - if ((adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0x87) || - (adev->pdev->device == 0x6820) || - (adev->pdev->device == 0x6821) || - (adev->pdev->device == 0x6822) || - (adev->pdev->device == 0x6823) || - (adev->pdev->device == 0x682A) || - (adev->pdev->device == 0x682B)) { - max_sclk = 75000; - max_mclk = 80000; - } } else if (adev->asic_type == CHIP_OLAND) { if ((adev->pdev->revision == 0xC7) || (adev->pdev->revision == 0x80) || -- GitLab From 2f946414a080e8f516d267a6722f38104699efd1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Jan 2017 19:15:12 +0100 Subject: [PATCH 0553/1442] USB: serial: ch341: fix modem-control and B0 handling commit 030ee7ae52a46a2be52ccc8242c4a330aba8d38e upstream. The modem-control signals are managed by the tty-layer during open and should not be asserted prematurely when set_termios is called from driver open. Also make sure that the signals are asserted only when changing speed from B0. Fixes: 664d5df92e88 ("USB: usb-serial ch341: support for DTR/RTS/CTS") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ch341.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 22b2c464b468..e98590aab633 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -362,24 +362,24 @@ static void ch341_set_termios(struct tty_struct *tty, baud_rate = tty_get_baud_rate(tty); if (baud_rate) { - spin_lock_irqsave(&priv->lock, flags); - priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS); - spin_unlock_irqrestore(&priv->lock, flags); priv->baud_rate = baud_rate; ch341_set_baudrate(port->serial->dev, priv); - } else { - spin_lock_irqsave(&priv->lock, flags); - priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS); - spin_unlock_irqrestore(&priv->lock, flags); } - ch341_set_handshake(port->serial->dev, priv->line_control); - /* Unimplemented: * (cflag & CSIZE) : data bits [5, 8] * (cflag & PARENB) : parity {NONE, EVEN, ODD} * (cflag & CSTOPB) : stop bits [1, 2] */ + + spin_lock_irqsave(&priv->lock, flags); + if (C_BAUD(tty) == B0) + priv->line_control &= ~(CH341_BIT_DTR | CH341_BIT_RTS); + else if (old_termios && (old_termios->c_cflag & CBAUD) == B0) + priv->line_control |= (CH341_BIT_DTR | CH341_BIT_RTS); + spin_unlock_irqrestore(&priv->lock, flags); + + ch341_set_handshake(port->serial->dev, priv->line_control); } static void ch341_break_ctl(struct tty_struct *tty, int break_state) -- GitLab From ce31072b43426e3bf45bc490e93504f5746cb9b5 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 10 Jan 2017 22:33:39 +0200 Subject: [PATCH 0554/1442] net/mlx5: Only cancel recovery work when cleaning up device commit 5e44fca5047054f1762813751626b5245e0da022 upstream. Do not attempt to drain the health workqueue when unloading the device in the recovery flow, this can cause a deadlock when the recovery work tries to cancel itself with sync. Because the work is no longer unconditionally canceled when unloading, it must be explicitly canceled in the AER flow. fixes: 689a248df83b ("net/mlx5: Cancel recovery work in remove flow") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 92bd13ddc39d..0c9ef8729ca7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1158,7 +1158,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, { int err = 0; - mlx5_drain_health_wq(dev); + if (cleanup) + mlx5_drain_health_wq(dev); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { @@ -1320,9 +1321,10 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - /* In case of kernel call save the pci state */ + /* In case of kernel call save the pci state and drain the health wq */ if (state) { pci_save_state(pdev); + mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); } -- GitLab From 64e236812197178a0ffae4ac76394730c392ccd9 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Wed, 11 Jan 2017 10:11:44 +0100 Subject: [PATCH 0555/1442] i2c: piix4: Avoid race conditions with IMC commit 701dc207bf551d9fe6defa36e84a911e880398c3 upstream. On AMD's SB800 and upwards, the SMBus is shared with the Integrated Micro Controller (IMC). The platform provides a hardware semaphore to avoid race conditions among them. (Check page 288 of the SB800-Series Southbridges Register Reference Guide http://support.amd.com/TechDocs/45482.pdf) Without this patch, many access to the SMBus end with an invalid transaction or even with the bus stalled. Reported-by: Alexandre Desnoyers Signed-off-by: Ricardo Ribalda Delgado Reviewed-by: Andy Shevchenko : Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-piix4.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index c2268cdf38e8..e34d82e79b98 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -585,10 +585,29 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, u8 command, int size, union i2c_smbus_data *data) { struct i2c_piix4_adapdata *adapdata = i2c_get_adapdata(adap); + unsigned short piix4_smba = adapdata->smba; + int retries = MAX_TIMEOUT; + int smbslvcnt; u8 smba_en_lo; u8 port; int retval; + /* Request the SMBUS semaphore, avoid conflicts with the IMC */ + smbslvcnt = inb_p(SMBSLVCNT); + do { + outb_p(smbslvcnt | 0x10, SMBSLVCNT); + + /* Check the semaphore status */ + smbslvcnt = inb_p(SMBSLVCNT); + if (smbslvcnt & 0x10) + break; + + usleep_range(1000, 2000); + } while (--retries); + /* SMBus is still owned by the IMC, we give up */ + if (!retries) + return -EBUSY; + mutex_lock(&piix4_mutex_sb800); outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX); @@ -606,6 +625,9 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, mutex_unlock(&piix4_mutex_sb800); + /* Release the semaphore */ + outb_p(smbslvcnt | 0x20, SMBSLVCNT); + return retval; } -- GitLab From 9bae3c370a9ecc658d6a807efa007fdec915d0ed Mon Sep 17 00:00:00 2001 From: Lukasz Odzioba Date: Wed, 28 Dec 2016 14:55:40 +0100 Subject: [PATCH 0556/1442] x86/cpu: Fix bootup crashes by sanitizing the argument of the 'clearcpuid=' command-line option commit dd853fd216d1485ed3045ff772079cc8689a9a4a upstream. A negative number can be specified in the cmdline which will be used as setup_clear_cpu_cap() argument. With that we can clear/set some bit in memory predceeding boot_cpu_data/cpu_caps_cleared which may cause kernel to misbehave. This patch adds lower bound check to setup_disablecpuid(). Boris Petkov reproduced a crash: [ 1.234575] BUG: unable to handle kernel paging request at ffffffff858bd540 [ 1.236535] IP: memcpy_erms+0x6/0x10 Signed-off-by: Lukasz Odzioba Acked-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andi.kleen@intel.com Cc: bp@alien8.de Cc: dave.hansen@linux.intel.com Cc: luto@kernel.org Cc: slaoub@gmail.com Fixes: ac72e7888a61 ("x86: add generic clearcpuid=... option") Link: http://lkml.kernel.org/r/1482933340-11857-1-git-send-email-lukasz.odzioba@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dd62708c6a67..023c7bfa24df 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1275,7 +1275,7 @@ static __init int setup_disablecpuid(char *arg) { int bit; - if (get_option(&arg, &bit) && bit < NCAPINTS*32) + if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32) setup_clear_cpu_cap(bit); else return 0; -- GitLab From 0aefd99f37f315a67dcb6487ce371f32dd1422a7 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 28 Dec 2016 22:13:15 -0200 Subject: [PATCH 0557/1442] nvme: apply DELAY_BEFORE_CHK_RDY quirk at probe time too commit b5a10c5f7532b7473776da87e67f8301bbc32693 upstream. Commit 54adc01055b7 ("nvme/quirk: Add a delay before checking for adapter readiness") introduced a quirk to adapters that cannot read the bit NVME_CSTS_RDY right after register NVME_REG_CC is set; these adapters need a delay or else the action of reading the bit NVME_CSTS_RDY could somehow corrupt adapter's registers state and it never recovers. When this quirk was added, we checked ctrl->tagset in order to avoid quirking in probe time, supposing we would never require such delay during probe. Well, it was too optimistic; we in fact need this quirk at probe time in some cases, like after a kexec. In some experiments, after abnormal shutdown of machine (aka power cord unplug), we booted into our bootloader in Power, which is a Linux kernel, and kexec'ed into another distro. If this kexec is too quick, we end up reaching the probe of NVMe adapter in that distro when adapter is in bad state (not fully initialized on our bootloader). What happens next is that nvme_wait_ready() is unable to complete, except if the quirk is enabled. So, this patch removes the original ctrl->tagset verification in order to enable the quirk even on probe time. Fixes: 54adc01055b7 ("nvme/quirk: Add a delay before checking for adapter readiness") Reported-by: Andrew Byrne Reported-by: Jaime A. H. Gomez Reported-by: Zachary D. Myers Signed-off-by: Guilherme G. Piccoli Acked-by: Jeffrey Lien Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 79e679d12f3b..da10b484bd25 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1122,12 +1122,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) if (ret) return ret; - /* Checking for ctrl->tagset is a trick to avoid sleeping on module - * load, since we only need the quirk on reset_controller. Notice - * that the HGST device needs this delay only in firmware activation - * procedure; unfortunately we have no (easy) way to verify this. - */ - if ((ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) && ctrl->tagset) + if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) msleep(NVME_QUIRK_DELAY_AMOUNT); return nvme_wait_ready(ctrl, cap, false); -- GitLab From 003e3163fcc76bfdf27c27ef4f5c776bfa925067 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 20 Dec 2016 13:28:28 -0500 Subject: [PATCH 0558/1442] btrfs: fix locking when we put back a delayed ref that's too new commit d0280996437081dd12ed1e982ac8aeaa62835ec4 upstream. In __btrfs_run_delayed_refs, when we put back a delayed ref that's too new, we have already dropped the lock on locked_ref when we set ->processing = 0. This patch keeps the lock to cover that assignment. Fixes: d7df2c796d7 (Btrfs: attach delayed ref updates to delayed ref heads) Signed-off-by: Jeff Mahoney Reviewed-by: Liu Bo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4607af38c72e..265d44127b1e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2537,11 +2537,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (ref && ref->seq && btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { spin_unlock(&locked_ref->lock); - btrfs_delayed_ref_unlock(locked_ref); spin_lock(&delayed_refs->lock); locked_ref->processing = 0; delayed_refs->num_heads_ready++; spin_unlock(&delayed_refs->lock); + btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; cond_resched(); count++; -- GitLab From 7a1dcd92fd396cd66f60b51fb102171d8dee460d Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 20 Dec 2016 13:28:27 -0500 Subject: [PATCH 0559/1442] btrfs: fix error handling when run_delayed_extent_op fails commit aa7c8da35d1905d80e840d075f07d26ec90144b5 upstream. In __btrfs_run_delayed_refs, the error path when run_delayed_extent_op fails sets locked_ref->processing = 0 but doesn't re-increment delayed_refs->num_heads_ready. As a result, we end up triggering the WARN_ON in btrfs_select_ref_head. Fixes: d7df2c796d7 (Btrfs: attach delayed ref updates to delayed ref heads) Reported-by: Jon Nelson Signed-off-by: Jeff Mahoney Reviewed-by: Liu Bo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 265d44127b1e..5909ae8c6731 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2587,7 +2587,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, */ if (must_insert_reserved) locked_ref->must_insert_reserved = 1; + spin_lock(&delayed_refs->lock); locked_ref->processing = 0; + delayed_refs->num_heads_ready++; + spin_unlock(&delayed_refs->lock); btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); -- GitLab From a5209aa990423458d10849575bd768663d40bebb Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 6 Dec 2016 15:08:16 +0100 Subject: [PATCH 0560/1442] pinctrl: meson: fix gpio request disabling other modes commit f24d311f92b516a8aadef5056424ccabb4068e7b upstream. The pinctrl_gpio_request is called with the "full" gpio number, already containing the base, then meson_pmx_request_gpio is then called with the final pin number. Remove the base addition when calling meson_pmx_disable_other_groups. Fixes: 6ac730951104 ("pinctrl: add driver for Amlogic Meson SoCs") CC: Beniamino Galvani Signed-off-by: Neil Armstrong Acked-by: Kevin Hilman Acked-by: Beniamino Galvani Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 57122eda155a..9443c9d408c6 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -212,7 +212,7 @@ static int meson_pmx_request_gpio(struct pinctrl_dev *pcdev, { struct meson_pinctrl *pc = pinctrl_dev_get_drvdata(pcdev); - meson_pmx_disable_other_groups(pc, range->pin_base + offset, -1); + meson_pmx_disable_other_groups(pc, offset, -1); return 0; } -- GitLab From 633b570376991e0447c63d925990ecc02896b25e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 28 Oct 2016 14:37:02 +0000 Subject: [PATCH 0561/1442] NFS: fix typo in parameter description commit f36ab161bebe464d33b998294eff29b17a9c8918 upstream. Fix typo in parameter description. Fixes: 5405fc44c337 ("NFSv4.x: Add kernel parameter to control the callback server") Signed-off-by: Wei Yongjun Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 001796bcd6c8..ddce94ce8142 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2904,7 +2904,7 @@ module_param(max_session_slots, ushort, 0644); MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " "requests the client will negotiate"); module_param(max_session_cb_slots, ushort, 0644); -MODULE_PARM_DESC(max_session_slots, "Maximum number of parallel NFSv4.1 " +MODULE_PARM_DESC(max_session_cb_slots, "Maximum number of parallel NFSv4.1 " "callbacks the client will process for a given server"); module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, -- GitLab From 4c4d4bec6c904fc050847f977b13052be5745ae1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Nov 2016 15:21:30 -0500 Subject: [PATCH 0562/1442] pNFS: Fix race in pnfs_wait_on_layoutreturn commit ee284e35d8c71bf5d4d807eaff6f67a17134b359 upstream. We must put the task to sleep while holding the inode->i_lock in order to ensure atomicity with the test for NFS_LAYOUT_RETURN. Fixes: 500d701f336b ("NFS41: make close wait for layoutreturn") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 31b107e196fd..415d7e69bc5e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1257,13 +1257,11 @@ bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) * i_lock */ spin_lock(&ino->i_lock); lo = nfsi->layout; - if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); sleep = true; + } spin_unlock(&ino->i_lock); - - if (sleep) - rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); - return sleep; } -- GitLab From e331f2f2b1fbb5157dfb4f0a977bc8c3737e5927 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 Nov 2016 10:54:55 -0500 Subject: [PATCH 0563/1442] NFS: Fix a performance regression in readdir commit 79f687a3de9e3ba2518b4ea33f38ca6cbe9133eb upstream. Ben Coddington reports that commit 311324ad1713, by adding the function nfs_dir_mapping_need_revalidate() that checks page cache validity on each call to nfs_readdir() causes a performance regression when the directory is being modified. If the directory is changing while we're iterating through the directory, POSIX does not require us to invalidate the page cache unless the user calls rewinddir(). However, we still do want to ensure that we use readdirplus in order to avoid a load of stat() calls when the user is doing an 'ls -l' workload. The fix should be to invalidate the page cache immediately when we're setting the NFS_INO_ADVISE_RDPLUS bit. Reported-by: Benjamin Coddington Fixes: 311324ad1713 ("NFS: Be more aggressive in using readdirplus...") Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/dir.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5f1af4cd1a33..53e02b8bd9bd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -477,7 +477,7 @@ void nfs_force_use_readdirplus(struct inode *dir) { if (!list_empty(&NFS_I(dir)->open_files)) { nfs_advise_use_readdirplus(dir); - nfs_zap_mapping(dir, dir->i_mapping); + invalidate_mapping_pages(dir->i_mapping, 0, -1); } } @@ -886,17 +886,6 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) goto out; } -static bool nfs_dir_mapping_need_revalidate(struct inode *dir) -{ - struct nfs_inode *nfsi = NFS_I(dir); - - if (nfs_attribute_cache_expired(dir)) - return true; - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - return true; - return false; -} - /* The file offset position represents the dirent entry number. A last cookie cache takes care of the common case of reading the whole directory. @@ -928,7 +917,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; - if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode)) + if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) goto out; -- GitLab From 396b25173370fc1afa3bc91a315d5597c2b10734 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 19 Dec 2016 11:19:31 +1100 Subject: [PATCH 0564/1442] NFSv4.1: nfs4_fl_prepare_ds must be careful about reporting success. commit cfd278c280f997cf2fe4662e0acab0fe465f637b upstream. Various places assume that if nfs4_fl_prepare_ds() turns a non-NULL 'ds', then ds->ds_clp will also be non-NULL. This is not necessasrily true in the case when the process received a fatal signal while nfs4_pnfs_ds_connect is waiting in nfs4_wait_ds_connect(). In that case ->ds_clp may not be set, and the devid may not recently have been marked unavailable. So add a test for ds_clp == NULL and return NULL in that case. Fixes: c23266d532b4 ("NFS4.1 Fix data server connection race") Signed-off-by: NeilBrown Acked-by: Olga Kornievskaia Acked-by: Adamson, Andy Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/filelayout/filelayoutdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 4946ef40ba87..85ef38f9765f 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -283,7 +283,8 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) s->nfs_client->cl_rpcclient->cl_auth->au_flavor); out_test_devid: - if (filelayout_test_devid_unavailable(devid)) + if (ret->ds_clp == NULL || + filelayout_test_devid_unavailable(devid)) ret = NULL; out: return ret; -- GitLab From 26eae206b78669ec4994d047a4b9418b1c588e35 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 17 Dec 2016 12:10:56 +0000 Subject: [PATCH 0565/1442] i2c: mux: pca954x: fix i2c mux selection caching commit 7f638c1cb0a1112dbe0b682a42db30521646686b upstream. smbus functions return -ve on error, 0 on success. However, __i2c_transfer() have a different return signature - -ve on error, or number of buffers transferred (which may be zero or greater.) The upshot of this is that the sense of the test is reversed when using the mux on a bus supporting the master_xfer method: we cache the value and never retry if we fail to transfer any buffers, but if we succeed, we clear the cached value. Fix this by making pca954x_reg_write() return a negative error code for all failure cases. Fixes: 463e8f845cbf ("i2c: mux: pca954x: retry updating the mux selection on failure") Acked-by: Peter Rosin Signed-off-by: Russell King Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/muxes/i2c-mux-pca954x.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 8bc3d36d2837..9c4ac26c014e 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -151,6 +151,9 @@ static int pca954x_reg_write(struct i2c_adapter *adap, buf[0] = val; msg.buf = buf; ret = __i2c_transfer(adap, &msg, 1); + + if (ret >= 0 && ret != 1) + ret = -EREMOTEIO; } else { union i2c_smbus_data data; ret = adap->algo->smbus_xfer(adap, client->addr, @@ -179,7 +182,7 @@ static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan) /* Only select the channel if its different from the last channel */ if (data->last_chan != regval) { ret = pca954x_reg_write(muxc->parent, client, regval); - data->last_chan = ret ? 0 : regval; + data->last_chan = ret < 0 ? 0 : regval; } return ret; -- GitLab From 3b376640445a68d3531caf39d8092331fd14b641 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 5 Dec 2016 18:27:38 +0200 Subject: [PATCH 0566/1442] drm/i915/gen9: Fix PCODE polling during SAGV disabling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dccf82ad1775f2b9c36ec85e25e39d88c7e86818 upstream. According to the previous patch, it's possible atm that we call intel_do_sagv_disable() only once during the 1ms period and time out if that call fails. As opposed to this the spec says that we need to keep retrying this request for a 1ms duration, so let's do this similarly to the CDCLK change notification request. v4-5: - Rebased on the reply_mask, reply change. v6: - Remove w/s change. (Lyude) - Rebased on the timeout_base argument change. Cc: Lyude Cc: Ville Syrjälä Cc: Chris Wilson Fixes: 656d1b89e5ff ("drm/i915/skl: Add support for the SAGV, fix underrun hangs") Signed-off-by: Imre Deak Reviewed-by: Lyude (v4) Link: http://patchwork.freedesktop.org/patch/msgid/1480955258-26311-2-git-send-email-imre.deak@intel.com (cherry picked from commit b3b8e99984a4eace91bc097e8f8cec71441cae16) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_pm.c | 34 +++++++++------------------------ 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 22313980789b..e559a45ff1f7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2955,24 +2955,10 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) return 0; } -static int -intel_do_sagv_disable(struct drm_i915_private *dev_priv) -{ - int ret; - uint32_t temp = GEN9_SAGV_DISABLE; - - ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL, - &temp); - if (ret) - return ret; - else - return temp & GEN9_SAGV_IS_DISABLED; -} - int intel_disable_sagv(struct drm_i915_private *dev_priv) { - int ret, result; + int ret; if (!intel_has_sagv(dev_priv)) return 0; @@ -2984,25 +2970,23 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->rps.hw_lock); /* bspec says to keep retrying for at least 1 ms */ - ret = wait_for(result = intel_do_sagv_disable(dev_priv), 1); + ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, + GEN9_SAGV_DISABLE, + GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, + 1); mutex_unlock(&dev_priv->rps.hw_lock); - if (ret == -ETIMEDOUT) { - DRM_ERROR("Request to disable SAGV timed out\n"); - return -ETIMEDOUT; - } - /* * Some skl systems, pre-release machines in particular, * don't actually have an SAGV. */ - if (IS_SKYLAKE(dev_priv) && result == -ENXIO) { + if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; return 0; - } else if (result < 0) { - DRM_ERROR("Failed to disable the SAGV\n"); - return result; + } else if (ret < 0) { + DRM_ERROR("Failed to disable the SAGV (%d)\n", ret); + return ret; } dev_priv->sagv_status = I915_SAGV_DISABLED; -- GitLab From 214417a0419b9e37bebf24a1e3c6d3dea8ed0b52 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:13:39 +0200 Subject: [PATCH 0567/1442] drm: avoid uninitialized timestamp use in wait_vblank commit cff52e5fc4cfc978b7df898dc14a0492c7ef0ae8 upstream. gcc warns about the timestamp in drm_wait_vblank being possibly used without an initialization: drivers/gpu/drm/drm_irq.c: In function 'drm_crtc_send_vblank_event': drivers/gpu/drm/drm_irq.c:992:24: error: 'now.tv_usec' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/gpu/drm/drm_irq.c:1069:17: note: 'now.tv_usec' was declared here drivers/gpu/drm/drm_irq.c:991:23: error: 'now.tv_sec' may be used uninitialized in this function [-Werror=maybe-uninitialized] This can happen if drm_vblank_count_and_time() returns 0 in its error path. To sanitize the error case, I'm changing that function to return a zero timestamp when it fails. Fixes: e6ae8687a87b ("drm: idiot-proof vblank") Reviewed-by: David Herrmann Cc: Rob Clark Cc: Daniel Vetter Signed-off-by: Arnd Bergmann Reviewed-by: Mario Kleiner Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161017221355.1861551-6-arnd@arndb.de Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_irq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index b969a64a1514..48a6167f5e7b 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -952,8 +952,10 @@ static u32 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe, u32 vblank_count; unsigned int seq; - if (WARN_ON(pipe >= dev->num_crtcs)) + if (WARN_ON(pipe >= dev->num_crtcs)) { + *vblanktime = (struct timeval) { 0 }; return 0; + } do { seq = read_seqbegin(&vblank->seqlock); -- GitLab From 699fbc4cb7d4ba30865f978750f0364cebabb31c Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 24 Oct 2016 21:21:15 +0800 Subject: [PATCH 0568/1442] drm/panel: simple: Check against num_timings when setting preferred for timing commit 230c5b44233ff0543c0b5ccf4ff9400057010fbe upstream. In the loop on .timings, we should check .num_timings to see if it's the only mode specified, not .num_modes, which should be used with .modes. Fixes: cda553725c92 ("drm/panel: simple: Set appropriate mode type") Signed-off-by: Chen-Yu Tsai Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/panel/panel-simple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 113db3c4a633..27cb42467b20 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -120,7 +120,7 @@ static int panel_simple_get_fixed_modes(struct panel_simple *panel) mode->type |= DRM_MODE_TYPE_DRIVER; - if (panel->desc->num_modes == 1) + if (panel->desc->num_timings == 1) mode->type |= DRM_MODE_TYPE_PREFERRED; drm_mode_probed_add(connector, mode); -- GitLab From 5bdb57418845ce068cd5272fbfc15d6ec6a94c7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 20 Dec 2016 17:39:02 +0200 Subject: [PATCH 0569/1442] drm/i915: Move the min_pixclk[] handling to the end of readout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 00b2b7288299a8c73c0c37b531a075ba5c849e67 upstream. Trying to determine the pixel rate of the pipe can't be done until we know the clock, which means it can't be done until the encoder .get_config() hooks have been called. So let's move the min_pixclk[] stuff to the end of intel_modeset_readout_hw_state() when we actually have gathered all the required infromation. Cc: Maarten Lankhorst Cc: Mika Kahola Cc: Ander Conselvan de Oliveira Fixes: 565602d7501a ("drm/i915: Do not acquire crtc state to check clock during modeset, v4.") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161220153902.15621-1-ville.syrjala@linux.intel.com Reviewed-by: Ander Conselvan de Oliveira Reviewed-by: Maarten Lankhorst (cherry picked from commit aca1ebf491518910df156f3dab6a66306bb52e28) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c9e83f39ec0a..869b29fe9ec4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -16749,7 +16749,6 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = crtc->config; - int pixclk = 0; __drm_atomic_helper_crtc_destroy_state(&crtc_state->base); memset(crtc_state, 0, sizeof(*crtc_state)); @@ -16761,23 +16760,9 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) crtc->base.enabled = crtc_state->base.enable; crtc->active = crtc_state->base.active; - if (crtc_state->base.active) { + if (crtc_state->base.active) dev_priv->active_crtcs |= 1 << crtc->pipe; - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - pixclk = ilk_pipe_pixel_rate(crtc_state); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - pixclk = crtc_state->base.adjusted_mode.crtc_clock; - else - WARN_ON(dev_priv->display.modeset_calc_cdclk); - - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixclk = DIV_ROUND_UP(pixclk * 100, 95); - } - - dev_priv->min_pixclk[crtc->pipe] = pixclk; - readout_plane_state(crtc); DRM_DEBUG_KMS("[CRTC:%d:%s] hw state readout: %s\n", @@ -16851,6 +16836,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } for_each_intel_crtc(dev, crtc) { + int pixclk = 0; + crtc->base.hwmode = crtc->config->base.adjusted_mode; memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); @@ -16878,10 +16865,23 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) */ crtc->base.state->mode.private_flags = I915_MODE_FLAG_INHERITED; + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) + pixclk = ilk_pipe_pixel_rate(crtc->config); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pixclk = crtc->config->base.adjusted_mode.crtc_clock; + else + WARN_ON(dev_priv->display.modeset_calc_cdclk); + + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (IS_BROADWELL(dev_priv) && crtc->config->ips_enabled) + pixclk = DIV_ROUND_UP(pixclk * 100, 95); + drm_calc_timestamping_constants(&crtc->base, &crtc->base.hwmode); update_scanline_offset(crtc); } + dev_priv->min_pixclk[crtc->pipe] = pixclk; + intel_pipe_config_sanity_check(dev_priv, crtc->config); } } -- GitLab From b4a69e3877ca10318a7a12296524e92a79a6eea3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 30 Nov 2016 20:51:26 +0000 Subject: [PATCH 0570/1442] drm: Initialise drm_mm.head_node.allocated commit cc98e6ce6abe1c0103cbd7aff1ee586622a9361e upstream. commit 202b52b7fbf7 ("drm: Track drm_mm nodes with an interval tree") introduced a requirement that the special drm_mm.head_node was initialised and marked as not being allocated. It is a very special node that has no side but has a hole that represents the drm_mm address space, and holds the list of nodes. Since it is not a real node, it is not part of the node rbtree and we detect this as it being unallocated. This presumed that drm_mm_init() was initialising it to zero. It happens that i915 kzallocs its objects and so it was accidentally setting it, but for generic use we cannot make that assumption. [ 22.981519] general protection fault: 0000 [#1] SMP [ 22.981521] Modules linked in: test_drm_mm(+) ctr ccm arc4 rt2800usb rt2x00usb rt2800lib rt2x00lib crc_ccitt mac80211 cmac rfcomm bnep snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic snd_hda_intel dcdbas snd_hda_codec x86_pkg_temp_thermal intel_powerclamp btusb snd_hda_core coretemp crct10dif_pclmul cfg80211 btrtl btbcm btintel bluetooth crc32_pclmul ghash_clmulni_intel aesni_intel snd_pcm i2c_hid aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd snd_timer hid_multitouch snd joydev serio_raw lpc_ich mfd_core i2c_designware_platform i2c_designware_core 8250_dw binfmt_misc soundcore acpi_pad nls_iso8859_1 usbhid hid psmouse ahci libahci [last unloaded: test_drm_mm] [ 22.981544] CPU: 1 PID: 2088 Comm: drm_mm Tainted: G W 4.9.0-rc7+ #234 [ 22.981545] Hardware name: Dell Inc. XPS 13 9343/0310JH, BIOS A07 11/11/2015 [ 22.981546] task: ffff88020c971cc0 task.stack: ffffc90001728000 [ 22.981547] RIP: 0010:[] [] drm_mm_interval_tree_add_node+0xa0/0xd0 [ 22.981551] RSP: 0018:ffffc9000172ba98 EFLAGS: 00010202 [ 22.981552] RAX: 0f0000c69cf63d80 RBX: ffff88020be00000 RCX: ffff88020be00000 [ 22.981553] RDX: 0000000000000fff RSI: ffffc9000172bc48 RDI: ffffffff810ac4df [ 22.981553] RBP: ffffc9000172bb08 R08: ffffc9000172bc70 R09: 0000000000000fff [ 22.981554] R10: ffffffff810ac4d7 R11: 4dc04d8b4cffffe5 R12: 0000000000001000 [ 22.981555] R13: ffffc9000172bbd0 R14: ffffc9000172bbe0 R15: 0000000002000000 [ 22.981556] FS: 00007f80c9fab740(0000) GS:ffff88021f480000(0000) knlGS:0000000000000000 [ 22.981557] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 22.981558] CR2: 00007f80c9fd5000 CR3: 000000020c191000 CR4: 00000000003406e0 [ 22.981559] Stack: [ 22.981560] ffffffff81405d09 ffff88020be00000 ffffc9000172bbe0 000000000172bb08 [ 22.981562] ffffffffffffffff 0000000000000000 0000000000000000 0000000000000000 [ 22.981563] 0000000002000000 0000000002000000 ffffffffa02f3000 ffff88020be00000 [ 22.981565] Call Trace: [ 22.981568] [] ? drm_mm_insert_node_generic+0x229/0x310 [ 22.981570] [] ? 0xffffffffa02f3000 [ 22.981572] [] __subtest_insert_range.constprop.7+0xd1/0x5b0 [test_drm_mm] [ 22.981575] [] ? default_wake_function+0x12/0x20 [ 22.981576] [] ? __wake_up_common+0x55/0x90 [ 22.981578] [] ? sched_clock_cpu+0x72/0xa0 [ 22.981581] [] ? irq_work_queue+0xd/0x80 [ 22.981582] [] ? wake_up_klogd+0x34/0x40 [ 22.981584] [] ? console_unlock+0x4cd/0x530 [ 22.981585] [] ? vprintk_emit+0x2d7/0x490 [ 22.981587] [] ? vprintk_default+0x1f/0x30 [ 22.981589] [] ? printk+0x4d/0x4f [ 22.981590] [] ? 0xffffffffa02f3000 [ 22.981592] [] subtest_insert_range+0x15/0x80 [test_drm_mm] [ 22.981594] [] test_drm_mm_init+0x88/0x1000 [test_drm_mm] [ 22.981597] [] do_one_initcall+0x3d/0x150 [ 22.981600] [] ? kfree+0x13f/0x180 [ 22.981602] [] do_init_module+0x60/0x1f1 [ 22.981606] [] load_module+0x2228/0x2790 [ 22.981608] [] ? __symbol_put+0x40/0x40 [ 22.981612] [] ? kernel_read+0x41/0x60 [ 22.981614] [] SYSC_finit_module+0x96/0xd0 [ 22.981617] [] SyS_finit_module+0xe/0x10 [ 22.981620] [] entry_SYSCALL_64_fastpath+0x17/0x98 [ 22.981622] Code: c7 41 30 00 00 00 00 48 89 e5 48 89 3a 48 c7 c2 20 4e 40 81 e8 b2 a1 f0 ff 5d c3 48 8d 56 78 45 31 d2 48 89 d6 eb 25 48 8b 51 58 <48> 39 50 38 73 04 48 89 50 38 4c 8b 58 28 4c 39 59 48 48 8d 50 [ 22.981651] RIP [] drm_mm_interval_tree_add_node+0xa0/0xd0 [ 22.981655] RSP Testcase: igt/drm_mm Fixes: 202b52b7fbf7 ("drm: Track drm_mm nodes with an interval tree") Signed-off-by: Chris Wilson Cc: David Herrmann Cc: dri-devel@lists.freedesktop.org Cc: Daniel Vetter Cc: # v4.9-rc1+ Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161130205126.31106-1-chris@chris-wilson.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_mm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 11d44a1e0ab3..ee07bb4a57b7 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -839,6 +839,7 @@ void drm_mm_init(struct drm_mm * mm, u64 start, u64 size) /* Clever trick to avoid a special case in the free hole tracking. */ INIT_LIST_HEAD(&mm->head_node.node_list); + mm->head_node.allocated = 0; mm->head_node.hole_follows = 1; mm->head_node.scanned_block = 0; mm->head_node.scanned_prev_free = 0; -- GitLab From 0e38a0b7acbb8d046c7312ae8cb1e98951934226 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 3 Nov 2016 19:37:25 -0700 Subject: [PATCH 0571/1442] remoteproc: qcom_wcnss: Fix circular module dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6de1a507c46bf22ed97043495b9ab96e4d5c213b upstream. The tie between the main WCNSS driver and the IRIS driver causes a circular dependency between the two modules. Neither part makes sense to have on their own so lets merge them into one module. For the sake of picking up the clock and regulator resources described in the iris of_node we need an associated struct device. But, to keep the size of the patch down we continue to represent the IRIS part as its own platform_driver, within the same module, rather than setting up a dummy device. Fixes: aed361adca9f ("remoteproc: qcom: Introduce WCNSS peripheral image loader") Reported-by: Andreas Färber Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/Kconfig | 5 ----- drivers/remoteproc/Makefile | 5 +++-- drivers/remoteproc/qcom_wcnss.c | 25 +++++++++++++++++++++++-- drivers/remoteproc/qcom_wcnss.h | 2 ++ drivers/remoteproc/qcom_wcnss_iris.c | 8 +------- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index f396bfef5d42..5fcbefcb8636 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -91,17 +91,12 @@ config QCOM_Q6V5_PIL Say y here to support the Qualcomm Peripherial Image Loader for the Hexagon V5 based remote processors. -config QCOM_WCNSS_IRIS - tristate - depends on OF && ARCH_QCOM - config QCOM_WCNSS_PIL tristate "Qualcomm WCNSS Peripheral Image Loader" depends on OF && ARCH_QCOM depends on QCOM_SMEM select QCOM_MDT_LOADER select QCOM_SCM - select QCOM_WCNSS_IRIS select REMOTEPROC help Say y here to support the Peripheral Image Loader for the Qualcomm diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile index 6dfb62ed643f..034b6f3563a7 100644 --- a/drivers/remoteproc/Makefile +++ b/drivers/remoteproc/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_WKUP_M3_RPROC) += wkup_m3_rproc.o obj-$(CONFIG_DA8XX_REMOTEPROC) += da8xx_remoteproc.o obj-$(CONFIG_QCOM_MDT_LOADER) += qcom_mdt_loader.o obj-$(CONFIG_QCOM_Q6V5_PIL) += qcom_q6v5_pil.o -obj-$(CONFIG_QCOM_WCNSS_IRIS) += qcom_wcnss_iris.o -obj-$(CONFIG_QCOM_WCNSS_PIL) += qcom_wcnss.o +obj-$(CONFIG_QCOM_WCNSS_PIL) += qcom_wcnss_pil.o +qcom_wcnss_pil-y += qcom_wcnss.o +qcom_wcnss_pil-y += qcom_wcnss_iris.o obj-$(CONFIG_ST_REMOTEPROC) += st_remoteproc.o diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index f5cedeaafba1..323b629474a6 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -143,7 +143,6 @@ void qcom_wcnss_assign_iris(struct qcom_wcnss *wcnss, mutex_unlock(&wcnss->iris_lock); } -EXPORT_SYMBOL_GPL(qcom_wcnss_assign_iris); static int wcnss_load(struct rproc *rproc, const struct firmware *fw) { @@ -619,6 +618,28 @@ static struct platform_driver wcnss_driver = { }, }; -module_platform_driver(wcnss_driver); +static int __init wcnss_init(void) +{ + int ret; + + ret = platform_driver_register(&wcnss_driver); + if (ret) + return ret; + + ret = platform_driver_register(&qcom_iris_driver); + if (ret) + platform_driver_unregister(&wcnss_driver); + + return ret; +} +module_init(wcnss_init); + +static void __exit wcnss_exit(void) +{ + platform_driver_unregister(&qcom_iris_driver); + platform_driver_unregister(&wcnss_driver); +} +module_exit(wcnss_exit); + MODULE_DESCRIPTION("Qualcomm Peripherial Image Loader for Wireless Subsystem"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/remoteproc/qcom_wcnss.h b/drivers/remoteproc/qcom_wcnss.h index 9dc4a9fe41e1..25fb7f62a457 100644 --- a/drivers/remoteproc/qcom_wcnss.h +++ b/drivers/remoteproc/qcom_wcnss.h @@ -4,6 +4,8 @@ struct qcom_iris; struct qcom_wcnss; +extern struct platform_driver qcom_iris_driver; + struct wcnss_vreg_info { const char * const name; int min_voltage; diff --git a/drivers/remoteproc/qcom_wcnss_iris.c b/drivers/remoteproc/qcom_wcnss_iris.c index f0ca24a8dd0b..05d6e175411a 100644 --- a/drivers/remoteproc/qcom_wcnss_iris.c +++ b/drivers/remoteproc/qcom_wcnss_iris.c @@ -94,14 +94,12 @@ int qcom_iris_enable(struct qcom_iris *iris) return ret; } -EXPORT_SYMBOL_GPL(qcom_iris_enable); void qcom_iris_disable(struct qcom_iris *iris) { clk_disable_unprepare(iris->xo_clk); regulator_bulk_disable(iris->num_vregs, iris->vregs); } -EXPORT_SYMBOL_GPL(qcom_iris_disable); static int qcom_iris_probe(struct platform_device *pdev) { @@ -174,7 +172,7 @@ static const struct of_device_id iris_of_match[] = { {} }; -static struct platform_driver wcnss_driver = { +struct platform_driver qcom_iris_driver = { .probe = qcom_iris_probe, .remove = qcom_iris_remove, .driver = { @@ -182,7 +180,3 @@ static struct platform_driver wcnss_driver = { .of_match_table = iris_of_match, }, }; - -module_platform_driver(wcnss_driver); -MODULE_DESCRIPTION("Qualcomm Wireless Subsystem Iris driver"); -MODULE_LICENSE("GPL v2"); -- GitLab From f607fd883654003e76db567ebb4c4dd9054892bf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 17 Oct 2016 16:23:35 +0000 Subject: [PATCH 0572/1442] remoteproc: st: Fix error return code in st_rproc_probe() commit 1d701d3dd8caf6660ff33c3c23a115b4649c5cdb upstream. Fix to return a negative error code from the st_rproc_state() error handling case instead of 0, as done elsewhere in this function. Fixes: 63edb0310a5c ("remoteproc: Supply controller driver for ST's Remote Processors") Signed-off-by: Wei Yongjun Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/st_remoteproc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/remoteproc/st_remoteproc.c b/drivers/remoteproc/st_remoteproc.c index ae8963fcc8c8..da4e152e9733 100644 --- a/drivers/remoteproc/st_remoteproc.c +++ b/drivers/remoteproc/st_remoteproc.c @@ -245,8 +245,10 @@ static int st_rproc_probe(struct platform_device *pdev) goto free_rproc; enabled = st_rproc_state(pdev); - if (enabled < 0) + if (enabled < 0) { + ret = enabled; goto free_rproc; + } if (enabled) { atomic_inc(&rproc->power); -- GitLab From e5f33ef0a60141e8eb560962750d6942a830048f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 11 Nov 2016 16:55:03 +1100 Subject: [PATCH 0573/1442] powerpc/64: Simplify adaptation to new ISA v3.00 HPTE format commit 6b243fcfb5f1e16bcf732e6f86a63f8af5b59a9f upstream. This changes the way that we support the new ISA v3.00 HPTE format. Instead of adapting everything that uses HPTE values to handle either the old format or the new format, depending on which CPU we are on, we now convert explicitly between old and new formats if necessary in the low-level routines that actually access HPTEs in memory. This limits the amount of code that needs to know about the new format and makes the conversions explicit. This is OK because the old format contains all the information that is in the new format. This also fixes operation under a hypervisor, because the H_ENTER hypercall (and other hypercalls that deal with HPTEs) will continue to require the HPTE value to be supplied in the old format. At present the kernel will not boot in HPT mode on POWER9 under a hypervisor. This fixes and partially reverts commit 50de596de8be ("powerpc/mm/hash: Add support for Power9 Hash", 2016-04-29). Fixes: 50de596de8be ("powerpc/mm/hash: Add support for Power9 Hash") Signed-off-by: Paul Mackerras Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 47 +++++++++++++++---- arch/powerpc/mm/hash_native_64.c | 30 +++++++++--- arch/powerpc/platforms/ps3/htab.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 2 +- 4 files changed, 65 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index e407af2b7333..2e6a823fa502 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -70,7 +70,9 @@ #define HPTE_V_SSIZE_SHIFT 62 #define HPTE_V_AVPN_SHIFT 7 +#define HPTE_V_COMMON_BITS ASM_CONST(0x000fffffffffffff) #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) +#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fffffffffff80) #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL)) #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) @@ -80,14 +82,16 @@ #define HPTE_V_VALID ASM_CONST(0x0000000000000001) /* - * ISA 3.0 have a different HPTE format. + * ISA 3.0 has a different HPTE format. */ #define HPTE_R_3_0_SSIZE_SHIFT 58 +#define HPTE_R_3_0_SSIZE_MASK (3ull << HPTE_R_3_0_SSIZE_SHIFT) #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) +#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000) #define HPTE_R_PP ASM_CONST(0x0000000000000003) #define HPTE_R_PPP ASM_CONST(0x8000000000000003) #define HPTE_R_N ASM_CONST(0x0000000000000004) @@ -316,11 +320,42 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, */ v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); v <<= HPTE_V_AVPN_SHIFT; - if (!cpu_has_feature(CPU_FTR_ARCH_300)) - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; + v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; return v; } +/* + * ISA v3.0 defines a new HPTE format, which differs from the old + * format in having smaller AVPN and ARPN fields, and the B field + * in the second dword instead of the first. + */ +static inline unsigned long hpte_old_to_new_v(unsigned long v) +{ + /* trim AVPN, drop B */ + return v & HPTE_V_COMMON_BITS; +} + +static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r) +{ + /* move B field from 1st to 2nd dword, trim ARPN */ + return (r & ~HPTE_R_3_0_SSIZE_MASK) | + (((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT); +} + +static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r) +{ + /* insert B field */ + return (v & HPTE_V_COMMON_BITS) | + ((r & HPTE_R_3_0_SSIZE_MASK) << + (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT)); +} + +static inline unsigned long hpte_new_to_old_r(unsigned long r) +{ + /* clear out B field */ + return r & ~HPTE_R_3_0_SSIZE_MASK; +} + /* * This function sets the AVPN and L fields of the HPTE appropriately * using the base page size and actual page size. @@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize, * aligned for the requested page size */ static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, - int actual_psize, int ssize) + int actual_psize) { - - if (cpu_has_feature(CPU_FTR_ARCH_300)) - pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT; - /* A 4K page needs no special encoding */ if (actual_psize == MMU_PAGE_4K) return pa & HPTE_R_RPN; diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 83ddc0e171b0..ad9fd5245be2 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -221,13 +221,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, return -1; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; if (!(vflags & HPTE_V_BOLTED)) { DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", i, hpte_v, hpte_r); } + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte_r = hpte_old_to_new_r(hpte_v, hpte_r); + hpte_v = hpte_old_to_new_v(hpte_v); + } + hptep->r = cpu_to_be64(hpte_r); /* Guarantee the second dword is visible before the valid bit */ eieio(); @@ -295,6 +300,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, vpn, want_v & HPTE_V_AVPN, slot, newpp); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* * We need to invalidate the TLB always because hpte_remove doesn't do * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -309,6 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, native_lock_hpte(hptep); /* recheck with locks held */ hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))) { ret = -1; @@ -350,6 +359,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) for (i = 0; i < HPTES_PER_GROUP; i++) { hptep = htab_address + slot; hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) /* HPTE matches */ @@ -409,6 +420,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, want_v = hpte_encode_avpn(vpn, bpsize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* * We need to invalidate the TLB always because hpte_remove doesn't do @@ -467,6 +480,8 @@ static void native_hugepage_invalidate(unsigned long vsid, want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) @@ -504,6 +519,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, /* Look at the 8 bit LP value */ unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte_v = hpte_new_to_old_v(hpte_v, hpte_r); + hpte_r = hpte_new_to_old_r(hpte_r); + } if (!(hpte_v & HPTE_V_LARGE)) { size = MMU_PAGE_4K; a_size = MMU_PAGE_4K; @@ -512,11 +531,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, a_size = hpte_page_sizes[lp] >> 4; } /* This works for all page sizes, and for 256M and 1T segments */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT; - else - *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; - + *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; shift = mmu_psize_defs[size].shift; avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); @@ -639,6 +654,9 @@ static void native_flush_hash_range(unsigned long number, int local) want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, + be64_to_cpu(hptep->r)); if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) native_unlock_hpte(hptep); diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index cb3c50328de8..cc2b281a3766 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn, vflags &= ~HPTE_V_SECONDARY; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags; spin_lock_irqsave(&ps3_htab_lock, flags); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index aa35245d8d6d..f2c98f6c1c9c 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -145,7 +145,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, hpte_group, vpn, pa, rflags, vflags, psize); hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; if (!(vflags & HPTE_V_BOLTED)) pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); -- GitLab From 89c728ed9237748b24456dc8502a6e1577b23105 Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Tue, 8 Nov 2016 05:39:28 -0500 Subject: [PATCH 0574/1442] cpufreq: powernv: Disable preemption while checking CPU throttling state commit 8a10c06a20ec8097a68fd7a4a1c0e285095b4d2f upstream. With preemption turned on we can read incorrect throttling state while being switched to CPU on a different chip. BUG: using smp_processor_id() in preemptible [00000000] code: cat/7343 caller is .powernv_cpufreq_throttle_check+0x2c/0x710 CPU: 13 PID: 7343 Comm: cat Not tainted 4.8.0-rc5-dirty #1 Call Trace: [c0000007d25b75b0] [c000000000971378] .dump_stack+0xe4/0x150 (unreliable) [c0000007d25b7640] [c0000000005162e4] .check_preemption_disabled+0x134/0x150 [c0000007d25b76e0] [c0000000007b63ac] .powernv_cpufreq_throttle_check+0x2c/0x710 [c0000007d25b7790] [c0000000007b6d18] .powernv_cpufreq_target_index+0x288/0x360 [c0000007d25b7870] [c0000000007acee4] .__cpufreq_driver_target+0x394/0x8c0 [c0000007d25b7920] [c0000000007b22ac] .cpufreq_set+0x7c/0xd0 [c0000007d25b79b0] [c0000000007adf50] .store_scaling_setspeed+0x80/0xc0 [c0000007d25b7a40] [c0000000007ae270] .store+0xa0/0x100 [c0000007d25b7ae0] [c0000000003566e8] .sysfs_kf_write+0x88/0xb0 [c0000007d25b7b70] [c0000000003553b8] .kernfs_fop_write+0x178/0x260 [c0000007d25b7c10] [c0000000002ac3cc] .__vfs_write+0x3c/0x1c0 [c0000007d25b7cf0] [c0000000002ad584] .vfs_write+0xc4/0x230 [c0000007d25b7d90] [c0000000002aeef8] .SyS_write+0x58/0x100 [c0000007d25b7e30] [c00000000000bfec] system_call+0x38/0xfc Fixes: 09a972d16209 (cpufreq: powernv: Report cpu frequency throttling) Reviewed-by: Gautham R. Shenoy Signed-off-by: Denis Kirjanov Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/powernv-cpufreq.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index d3ffde806629..a84724eabfb8 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -647,8 +647,14 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, if (unlikely(rebooting) && new_index != get_nominal_index()) return 0; - if (!throttled) + if (!throttled) { + /* we don't want to be preempted while + * checking if the CPU frequency has been throttled + */ + preempt_disable(); powernv_cpufreq_throttle_check(NULL); + preempt_enable(); + } cur_msec = jiffies_to_msecs(get_jiffies_64()); -- GitLab From 26991b28b47c4a730f08337de557fe4883c44cc2 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 10 Nov 2016 10:45:18 +0000 Subject: [PATCH 0575/1442] regulators: helpers: Fix handling of bypass_val_on in get_bypass_regmap commit 85b037442e3f0e84296ab1010fd6b057eee18496 upstream. The handling of bypass_val_on that was added in regulator_get_bypass_regmap is done unconditionally however several drivers don't define a value for bypass_val_on. This results in those drivers reporting bypass being enabled when it is not. In regulator_set_bypass_regmap we use bypass_mask if bypass_val_on is zero. This patch adds similar handling in regulator_get_bypass_regmap. Fixes: commit dd1a571daee7 ("regulator: helpers: Ensure bypass register field matches ON value") Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/helpers.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c index bcf38fd5106a..379cdacc05d8 100644 --- a/drivers/regulator/helpers.c +++ b/drivers/regulator/helpers.c @@ -454,13 +454,17 @@ EXPORT_SYMBOL_GPL(regulator_set_bypass_regmap); int regulator_get_bypass_regmap(struct regulator_dev *rdev, bool *enable) { unsigned int val; + unsigned int val_on = rdev->desc->bypass_val_on; int ret; ret = regmap_read(rdev->regmap, rdev->desc->bypass_reg, &val); if (ret != 0) return ret; - *enable = (val & rdev->desc->bypass_mask) == rdev->desc->bypass_val_on; + if (!val_on) + val_on = rdev->desc->bypass_mask; + + *enable = (val & rdev->desc->bypass_mask) == val_on; return 0; } -- GitLab From a4e73cc229d8c2e7f75016e3f9e36455fb3c4aca Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 30 Nov 2016 22:22:54 +0300 Subject: [PATCH 0576/1442] ACPI / CPPC: set an error code on probe error path commit 501634759d55a5b56967de6d9465acf02bbc3565 upstream. We should return -EINVAL (instead of 0) if get_cpu_device() fails. Fixes: 158c998ea44b (ACPI / CPPC: add sysfs support to compute delivered performance) Signed-off-by: Dan Carpenter Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/cppc_acpi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index d0d0504b7c89..e0ea8f56d2bf 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -784,8 +784,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) /* Add per logical CPU nodes for reading its feedback counters. */ cpu_dev = get_cpu_device(pr->id); - if (!cpu_dev) + if (!cpu_dev) { + ret = -EINVAL; goto out_free; + } ret = kobject_init_and_add(&cpc_ptr->kobj, &cppc_ktype, &cpu_dev->kobj, "acpi_cppc"); -- GitLab From f99694cdaf7667c737a6ffa14609a4e880647dcd Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 18 Nov 2016 15:16:06 +0100 Subject: [PATCH 0577/1442] block: Change extern inline to static inline commit 9a05e7541c39680d28ecf91892338e074738d5fd upstream. With compilers which follow the C99 standard (like modern versions of gcc and clang), "extern inline" does the opposite thing from older versions of gcc (emits code for an externally linkable version of the inline function). "static inline" does the intended behavior in all cases instead. Description taken from commit 6d91857d4826 ("staging, rtl8192e, LLVMLinux: Change extern inline to static inline"). This also fixes the following GCC warning when building with CONFIG_PM disabled: ./include/linux/blkdev.h:1143:20: warning: no previous prototype for 'blk_set_runtime_active' [-Wmissing-prototypes] Fixes: d07ab6d11477 ("block: Add blk_set_runtime_active()") Reviewed-by: Mika Westerberg Signed-off-by: Tobias Klauser Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c47c358ba052..f6a816129856 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1057,7 +1057,7 @@ static inline int blk_pre_runtime_suspend(struct request_queue *q) static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {} static inline void blk_pre_runtime_resume(struct request_queue *q) {} static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} -extern inline void blk_set_runtime_active(struct request_queue *q) {} +static inline void blk_set_runtime_active(struct request_queue *q) {} #endif /* -- GitLab From f57d8710096d9f95ed4fd5bb80a6dc5865e4ea7c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 10 Nov 2016 11:16:37 -0500 Subject: [PATCH 0578/1442] block: cfq_cpd_alloc() should use @gfp commit ebc4ff661fbe76781c6b16dfb7b754a5d5073f8e upstream. cfq_cpd_alloc() which is the cpd_alloc_fn implementation for cfq was incorrectly hard coding GFP_KERNEL instead of using the mask specified through the @gfp parameter. This currently doesn't cause any actual issues because all current callers specify GFP_KERNEL. Fix it. Signed-off-by: Tejun Heo Reported-by: Dan Carpenter Fixes: e4a9bde9589f ("blkcg: replace blkcg_policy->cpd_size with ->cpd_alloc/free_fn() methods") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/cfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5e24d880306c..3ab6807773ee 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1596,7 +1596,7 @@ static struct blkcg_policy_data *cfq_cpd_alloc(gfp_t gfp) { struct cfq_group_data *cgd; - cgd = kzalloc(sizeof(*cgd), GFP_KERNEL); + cgd = kzalloc(sizeof(*cgd), gfp); if (!cgd) return NULL; return &cgd->cpd; -- GitLab From f53cfb081f000bb07f838515bf090a9d7d9047c6 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 30 Nov 2016 08:19:39 -0500 Subject: [PATCH 0579/1442] ACPI / APEI: Fix NMI notification handling commit a545715d2dae8d071c5b06af947b07ffa846b288 upstream. When removing and adding cpu 0 on a system with GHES NMI the following stack trace is seen when re-adding the cpu: WARNING: CPU: 0 PID: 0 at arch/x86/kernel/apic/apic.c:1349 setup_local_APIC+ Modules linked in: nfsv3 rpcsec_gss_krb5 nfsv4 nfs fscache coretemp intel_ra CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.9.0-rc6+ #2 Call Trace: dump_stack+0x63/0x8e __warn+0xd1/0xf0 warn_slowpath_null+0x1d/0x20 setup_local_APIC+0x275/0x370 apic_ap_setup+0xe/0x20 start_secondary+0x48/0x180 set_init_arg+0x55/0x55 early_idt_handler_array+0x120/0x120 x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x13d/0x14c During the cpu bringup, wakeup_cpu_via_init_nmi() is called and issues an NMI on CPU 0. The GHES NMI handler, ghes_notify_nmi() runs the ghes_proc_irq_work work queue which ends up setting IRQ_WORK_VECTOR (0xf6). The "faulty" IR line set at arch/x86/kernel/apic/apic.c:1349 is also 0xf6 (specifically APIC IRR for irqs 255 to 224 is 0x400000) which confirms that something has set the IRQ_WORK_VECTOR line prior to the APIC being initialized. Commit 2383844d4850 ("GHES: Elliminate double-loop in the NMI handler") incorrectly modified the behavior such that the handler returns NMI_HANDLED only if an error was processed, and incorrectly runs the ghes work queue for every NMI. This patch modifies the ghes_proc_irq_work() to run as it did prior to 2383844d4850 ("GHES: Elliminate double-loop in the NMI handler") by properly returning NMI_HANDLED and only calling the work queue if NMI_HANDLED has been set. Fixes: 2383844d4850 (GHES: Elliminate double-loop in the NMI handler) Signed-off-by: Prarit Bhargava Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/apei/ghes.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 0d099a24f776..e53bef6cf53c 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -852,6 +852,8 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) if (ghes_read_estatus(ghes, 1)) { ghes_clear_estatus(ghes); continue; + } else { + ret = NMI_HANDLED; } sev = ghes_severity(ghes->estatus->error_severity); @@ -863,12 +865,11 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) __process_error(ghes); ghes_clear_estatus(ghes); - - ret = NMI_HANDLED; } #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG - irq_work_queue(&ghes_proc_irq_work); + if (ret == NMI_HANDLED) + irq_work_queue(&ghes_proc_irq_work); #endif atomic_dec(&ghes_in_nmi); return ret; -- GitLab From aa3ee06a2ae2fe7b7b65fc9a82d52b96f99ab1d6 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Mon, 28 Nov 2016 13:53:11 -0800 Subject: [PATCH 0580/1442] powercap/intel_rapl: fix and tidy up error handling commit cb43f81b8489dcb87555e16c17453f0a9fa690f2 upstream. Commit e1399ba20eee ("powercap / RAPL: handle missing MSRs") added contraint_to_pl() function to return index into an array. But it can potentially return -EINVAL if powercap layer sends an out of range constraint ID. This patch adds sanity check. Unnecessary RAPL domain pointer check is removed since it must be initialized before calling rapl_unit_xlate(). Fixes: e1399ba20eee ("powercap / RAPL: handle missing MSRs") Reported-by: Odzioba, Lukasz Reported-by: Koss, Marcin Signed-off-by: Jacob Pan Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/powercap/intel_rapl.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 243b233ff31b..3c71f608b444 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -442,6 +442,7 @@ static int contraint_to_pl(struct rapl_domain *rd, int cid) return i; } } + pr_err("Cannot find matching power limit for constraint %d\n", cid); return -EINVAL; } @@ -457,6 +458,10 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid, get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); id = contraint_to_pl(rd, cid); + if (id < 0) { + ret = id; + goto set_exit; + } rp = rd->rp; @@ -496,6 +501,11 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid, get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); id = contraint_to_pl(rd, cid); + if (id < 0) { + ret = id; + goto get_exit; + } + switch (rd->rpl[id].prim_id) { case PL1_ENABLE: prim = POWER_LIMIT1; @@ -512,6 +522,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid, else *data = val; +get_exit: put_online_cpus(); return ret; @@ -527,6 +538,10 @@ static int set_time_window(struct powercap_zone *power_zone, int cid, get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); id = contraint_to_pl(rd, cid); + if (id < 0) { + ret = id; + goto set_time_exit; + } switch (rd->rpl[id].prim_id) { case PL1_ENABLE: @@ -538,6 +553,8 @@ static int set_time_window(struct powercap_zone *power_zone, int cid, default: ret = -EINVAL; } + +set_time_exit: put_online_cpus(); return ret; } @@ -552,6 +569,10 @@ static int get_time_window(struct powercap_zone *power_zone, int cid, u64 *data) get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); id = contraint_to_pl(rd, cid); + if (id < 0) { + ret = id; + goto get_time_exit; + } switch (rd->rpl[id].prim_id) { case PL1_ENABLE: @@ -566,6 +587,8 @@ static int get_time_window(struct powercap_zone *power_zone, int cid, u64 *data) } if (!ret) *data = val; + +get_time_exit: put_online_cpus(); return ret; @@ -707,7 +730,7 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, case ENERGY_UNIT: scale = ENERGY_UNIT_SCALE; /* per domain unit takes precedence */ - if (rd && rd->domain_energy_unit) + if (rd->domain_energy_unit) units = rd->domain_energy_unit; else units = rp->energy_unit; -- GitLab From 4b07d85374f582fc3aa009deda62fcc26713146a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 17 Sep 2016 00:41:37 +0000 Subject: [PATCH 0581/1442] iw_cxgb4: Fix error return code in c4iw_rdev_open() commit 15f7e3c21b76598bc6e5816d2577ce843b2b963f upstream. Fix to return error code -ENOMEM from the __get_free_page() error handling case instead of 0, as done elsewhere in this function. Fixes: 05eb23893c2c ("cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes") Signed-off-by: Wei Yongjun Acked-by: Steve Wise Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb4/device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 93e3d270a98a..b99dc9e0ffb2 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -828,8 +828,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) } rdev->status_page = (struct t4_dev_status_page *) __get_free_page(GFP_KERNEL); - if (!rdev->status_page) + if (!rdev->status_page) { + err = -ENOMEM; goto destroy_ocqp_pool; + } rdev->status_page->qp_start = rdev->lldi.vr->qp.start; rdev->status_page->qp_size = rdev->lldi.vr->qp.size; rdev->status_page->cq_start = rdev->lldi.vr->cq.start; -- GitLab From df386a0c2804c0a2282dd95d5530e3aa75f7cd3f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 14 Nov 2016 19:38:31 -0800 Subject: [PATCH 0582/1442] bq24190_charger: Fix PM runtime use for bq24190_battery_set_property commit 075eb5719d53e8bb4a406ad87e1de99319aa50f0 upstream. There's a typo, it should do pm_runtime_get_sync, not put. Fixes: d7bf353fd0aa3 ("bq24190_charger: Add support for TI BQ24190 Battery Charger") Cc: Mark A. Greer Signed-off-by: Tony Lindgren Reviewed-by: Mark Greer Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/bq24190_charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index f5746b9f4e83..e9584330aeed 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -1141,7 +1141,7 @@ static int bq24190_battery_set_property(struct power_supply *psy, dev_dbg(bdi->dev, "prop: %d\n", psp); - pm_runtime_put_sync(bdi->dev); + pm_runtime_get_sync(bdi->dev); switch (psp) { case POWER_SUPPLY_PROP_ONLINE: -- GitLab From cb50d45c3d42633830ca60b34bf46c14941cdde4 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Fri, 4 Nov 2016 13:33:13 -0500 Subject: [PATCH 0583/1442] power: supply: bq27xxx_battery: Fix register map for BQ27510 and BQ27520 commit 3bee9ea1de687925d116670f036599cbed8b66b0 upstream. The BQ27510 and BQ27520 use a slightly different register map than the BQ27500, add a new type enum and add these gauges to it. Fixes: d74534c27775 ("power: bq27xxx_battery: Add support for additional bq27xxx family devices") Based-on-patch-by: Kenneth R. Crudup Signed-off-by: Andrew F. Davis Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- drivers/power/supply/bq27xxx_battery.c | 41 +++++++++++++++++++++- drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-- include/linux/power/bq27xxx_battery.h | 3 +- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index 3b0dbc689d72..bccb3f595ff3 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -164,6 +164,25 @@ static u8 bq27xxx_regs[][BQ27XXX_REG_MAX] = { [BQ27XXX_REG_DCAP] = 0x3c, [BQ27XXX_REG_AP] = INVALID_REG_ADDR, }, + [BQ27510] = { + [BQ27XXX_REG_CTRL] = 0x00, + [BQ27XXX_REG_TEMP] = 0x06, + [BQ27XXX_REG_INT_TEMP] = 0x28, + [BQ27XXX_REG_VOLT] = 0x08, + [BQ27XXX_REG_AI] = 0x14, + [BQ27XXX_REG_FLAGS] = 0x0a, + [BQ27XXX_REG_TTE] = 0x16, + [BQ27XXX_REG_TTF] = INVALID_REG_ADDR, + [BQ27XXX_REG_TTES] = 0x1a, + [BQ27XXX_REG_TTECP] = INVALID_REG_ADDR, + [BQ27XXX_REG_NAC] = 0x0c, + [BQ27XXX_REG_FCC] = 0x12, + [BQ27XXX_REG_CYCT] = 0x1e, + [BQ27XXX_REG_AE] = INVALID_REG_ADDR, + [BQ27XXX_REG_SOC] = 0x20, + [BQ27XXX_REG_DCAP] = 0x2e, + [BQ27XXX_REG_AP] = INVALID_REG_ADDR, + }, [BQ27530] = { [BQ27XXX_REG_CTRL] = 0x00, [BQ27XXX_REG_TEMP] = 0x06, @@ -302,6 +321,24 @@ static enum power_supply_property bq27500_battery_props[] = { POWER_SUPPLY_PROP_MANUFACTURER, }; +static enum power_supply_property bq27510_battery_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_PRESENT, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CURRENT_NOW, + POWER_SUPPLY_PROP_CAPACITY, + POWER_SUPPLY_PROP_CAPACITY_LEVEL, + POWER_SUPPLY_PROP_TEMP, + POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW, + POWER_SUPPLY_PROP_TECHNOLOGY, + POWER_SUPPLY_PROP_CHARGE_FULL, + POWER_SUPPLY_PROP_CHARGE_NOW, + POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, + POWER_SUPPLY_PROP_CYCLE_COUNT, + POWER_SUPPLY_PROP_HEALTH, + POWER_SUPPLY_PROP_MANUFACTURER, +}; + static enum power_supply_property bq27530_battery_props[] = { POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_PRESENT, @@ -385,6 +422,7 @@ static struct { BQ27XXX_PROP(BQ27000, bq27000_battery_props), BQ27XXX_PROP(BQ27010, bq27010_battery_props), BQ27XXX_PROP(BQ27500, bq27500_battery_props), + BQ27XXX_PROP(BQ27510, bq27510_battery_props), BQ27XXX_PROP(BQ27530, bq27530_battery_props), BQ27XXX_PROP(BQ27541, bq27541_battery_props), BQ27XXX_PROP(BQ27545, bq27545_battery_props), @@ -635,7 +673,8 @@ static int bq27xxx_battery_read_pwr_avg(struct bq27xxx_device_info *di) */ static bool bq27xxx_battery_overtemp(struct bq27xxx_device_info *di, u16 flags) { - if (di->chip == BQ27500 || di->chip == BQ27541 || di->chip == BQ27545) + if (di->chip == BQ27500 || di->chip == BQ27510 || + di->chip == BQ27541 || di->chip == BQ27545) return flags & (BQ27XXX_FLAG_OTC | BQ27XXX_FLAG_OTD); if (di->chip == BQ27530 || di->chip == BQ27421) return flags & BQ27XXX_FLAG_OT; diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 85d4ea2a9c20..5c5c3a6f9923 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -149,8 +149,8 @@ static const struct i2c_device_id bq27xxx_i2c_id_table[] = { { "bq27200", BQ27000 }, { "bq27210", BQ27010 }, { "bq27500", BQ27500 }, - { "bq27510", BQ27500 }, - { "bq27520", BQ27500 }, + { "bq27510", BQ27510 }, + { "bq27520", BQ27510 }, { "bq27530", BQ27530 }, { "bq27531", BQ27530 }, { "bq27541", BQ27541 }, diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index e30deb046156..bed9557b69e7 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -4,7 +4,8 @@ enum bq27xxx_chip { BQ27000 = 1, /* bq27000, bq27200 */ BQ27010, /* bq27010, bq27210 */ - BQ27500, /* bq27500, bq27510, bq27520 */ + BQ27500, /* bq27500 */ + BQ27510, /* bq27510, bq27520 */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ BQ27545, /* bq27545 */ -- GitLab From d9c19f90f3a50eb4ff8d17616492338c0b2f9434 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 28 Sep 2016 00:24:24 -0300 Subject: [PATCH 0584/1442] blk-mq: Always schedule hctx->next_cpu commit c02ebfdddbafa9a6a0f52fbd715e6bfa229af9d3 upstream. Commit 0e87e58bf60e ("blk-mq: improve warning for running a queue on the wrong CPU") attempts to avoid triggering the WARN_ON in __blk_mq_run_hw_queue when the expected CPU is dead. Problem is, in the last batch execution before round robin, blk_mq_hctx_next_cpu can schedule a dead CPU and also update next_cpu to the next alive CPU in the mask, which will trigger the WARN_ON despite the previous workaround. The following patch fixes this scenario by always scheduling the value in hctx->next_cpu. This changes the moment when we round-robin the CPU running the hctx, but it really doesn't matter, since it still executes BLK_MQ_CPU_WORK_BATCH times in a row before switching to another CPU. Fixes: 0e87e58bf60e ("blk-mq: improve warning for running a queue on the wrong CPU") Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index ad459e4e8071..81caceb96c3c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -895,7 +895,7 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) return WORK_CPU_UNBOUND; if (--hctx->next_cpu_batch <= 0) { - int cpu = hctx->next_cpu, next_cpu; + int next_cpu; next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask); if (next_cpu >= nr_cpu_ids) @@ -903,8 +903,6 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) hctx->next_cpu = next_cpu; hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; - - return cpu; } return hctx->next_cpu; -- GitLab From 42eded92c42b7c177ac749e1c44af38acba42d37 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Nov 2016 17:31:30 +0000 Subject: [PATCH 0585/1442] bus: vexpress-config: fix device reference leak commit c090959b9dd8c87703e275079aa4b4a824ba3f8e upstream. Make sure to drop the reference to the parent device taken by class_find_device() after populating the bus. Fixes: 3b9334ac835b ("mfd: vexpress: Convert custom func API to regmap") Acked-by: Sudeep Holla Signed-off-by: Johan Hovold Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/bus/vexpress-config.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/bus/vexpress-config.c b/drivers/bus/vexpress-config.c index 9efdf1de4035..493e7b9fc813 100644 --- a/drivers/bus/vexpress-config.c +++ b/drivers/bus/vexpress-config.c @@ -171,6 +171,7 @@ static int vexpress_config_populate(struct device_node *node) { struct device_node *bridge; struct device *parent; + int ret; bridge = of_parse_phandle(node, "arm,vexpress,config-bridge", 0); if (!bridge) @@ -182,7 +183,11 @@ static int vexpress_config_populate(struct device_node *node) if (WARN_ON(!parent)) return -ENODEV; - return of_platform_populate(node, NULL, NULL, parent); + ret = of_platform_populate(node, NULL, NULL, parent); + + put_device(parent); + + return ret; } static int __init vexpress_config_init(void) -- GitLab From ae4c24b2d58492d0da72b93aca9cd2f3ed48c69e Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Wed, 9 Nov 2016 16:36:33 +1100 Subject: [PATCH 0586/1442] powerpc/mm: Correct process and partition table max size commit 555c16328ae6d75a90e234eac9b51998d68f185b upstream. Version 3.00 of the ISA states that the PATS (partition table size) field of the PTCR (partition table control register) and the PRTS (process table size) field of the partition table entry must both be less than or equal to 24. However the actual size of the partition and process tables is equal to 2 to the power of 12 plus the PATS and PRTS fields, respectively. This means that the max allowable size of each of these tables is 2^36 or 64GB for both. Thus when checking the size shift for each we should be checking for values of greater than 36 instead of the current check for shifts larger than 24 and 23. Fixes: 2bfd65e45e877fb5704730244da67c748d28a1b8 Signed-off-by: Suraj Jitindar Singh Reviewed-by: Balbir Singh Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/pgtable-radix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 688b54517655..ebb7f46f0532 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -159,7 +159,7 @@ static void __init radix_init_pgtable(void) * Allocate Partition table and process table for the * host. */ - BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 23), "Process table size too large."); + BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large."); process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. @@ -181,7 +181,7 @@ static void __init radix_init_partition_table(void) rts_field = radix__get_tree_size(); - BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); + BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR); -- GitLab From 25bba7152f2afc3e9517a53f3aa934fbaa68cd68 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 16:26:01 +0100 Subject: [PATCH 0587/1442] powerpc/ibmebus: Fix further device reference leaks commit 815a7141c4d1b11610dccb7fcbb38633759824f2 upstream. Make sure to drop any reference taken by bus_find_device() when creating devices during init and driver registration. Fixes: 55347cc9962f ("[POWERPC] ibmebus: Add device creation and bus probing based on of_device") Signed-off-by: Johan Hovold Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/ibmebus.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 6ca9a2ffaac7..0379ddf29220 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -180,6 +180,7 @@ static int ibmebus_create_device(struct device_node *dn) static int ibmebus_create_devices(const struct of_device_id *matches) { struct device_node *root, *child; + struct device *dev; int ret = 0; root = of_find_node_by_path("/"); @@ -188,9 +189,12 @@ static int ibmebus_create_devices(const struct of_device_id *matches) if (!of_match_node(matches, child)) continue; - if (bus_find_device(&ibmebus_bus_type, NULL, child, - ibmebus_match_node)) + dev = bus_find_device(&ibmebus_bus_type, NULL, child, + ibmebus_match_node); + if (dev) { + put_device(dev); continue; + } ret = ibmebus_create_device(child); if (ret) { -- GitLab From dcd15c20c2088e7d0a910f74c706dd7870dc7f62 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 16:26:00 +0100 Subject: [PATCH 0588/1442] powerpc/ibmebus: Fix device reference leaks in sysfs interface commit fe0f3168169f7c34c29b0cf0c489f126a7f29643 upstream. Make sure to drop any reference taken by bus_find_device() in the sysfs callbacks that are used to create and destroy devices based on device-tree entries. Fixes: 6bccf755ff53 ("[POWERPC] ibmebus: dynamic addition/removal of adapters, some code cleanup") Signed-off-by: Johan Hovold Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/ibmebus.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 0379ddf29220..35f5244782d9 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -266,6 +266,7 @@ static ssize_t ibmebus_store_probe(struct bus_type *bus, const char *buf, size_t count) { struct device_node *dn = NULL; + struct device *dev; char *path; ssize_t rc = 0; @@ -273,8 +274,10 @@ static ssize_t ibmebus_store_probe(struct bus_type *bus, if (!path) return -ENOMEM; - if (bus_find_device(&ibmebus_bus_type, NULL, path, - ibmebus_match_path)) { + dev = bus_find_device(&ibmebus_bus_type, NULL, path, + ibmebus_match_path); + if (dev) { + put_device(dev); printk(KERN_WARNING "%s: %s has already been probed\n", __func__, path); rc = -EEXIST; @@ -311,6 +314,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus, if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path, ibmebus_match_path))) { of_device_unregister(to_platform_device(dev)); + put_device(dev); kfree(path); return count; -- GitLab From 3083593700ddf653f765031189805fef56959281 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Wed, 16 Nov 2016 12:12:26 +1100 Subject: [PATCH 0589/1442] powerpc/powernv: Don't warn on PE init if unfreeze is unsupported commit d4791db527bf397c84c9956c3ece9692ed5322ac upstream. Whenever a PE is initialised in powernv, opal_pci_eeh_freeze_clear() is called. This is to remove any existing freeze, and has no negative side effects if the PE is already in an unfrozen state. On PHB backends that don't support this operation and return OPAL_UNSUPPORTED, this creates a scary and misleading warning message. Skip the warning message on init if OPAL_UNSUPPORTED is returned. As far as I'm aware, this currently only affects NPUs. Fixes: 313483d ("powerpc/powernv: Unfreeze PE on allocation") Signed-off-by: Russell Currey Acked-by: Gavin Shan Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d4b33dd2d9e7..dcdfee0cd4f2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -145,7 +145,7 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) */ rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); - if (rc != OPAL_SUCCESS) + if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED) pr_warn("%s: Error %lld unfreezing PHB#%d-PE#%d\n", __func__, rc, phb->hose->global_number, pe_no); -- GitLab From 736aec92c4e95a993a6e936ba3a90498975f5d62 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 8 Nov 2016 13:44:39 +0800 Subject: [PATCH 0590/1442] arm64: hugetlb: fix the wrong address for several functions commit 0c2f0afe3582c58efeef93bc57bc07d502132618 upstream. The libhugetlbfs meets several failures since the following functions do not use the correct address: huge_ptep_get_and_clear() huge_ptep_set_access_flags() huge_ptep_set_wrprotect() huge_ptep_clear_flush() This patch fixes the wrong address for them. Signed-off-by: Huang Shijie Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/hugetlbpage.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 2e49bd252fe7..52f2c8fd1228 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -212,7 +212,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); /* save the 1st pte to return */ pte = ptep_get_and_clear(mm, addr, cpte); - for (i = 1; i < ncontig; ++i) { + for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) { /* * If HW_AFDBM is enabled, then the HW could * turn on the dirty bit for any of the page @@ -250,7 +250,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, pfn = pte_pfn(*cpte); ncontig = find_num_contig(vma->vm_mm, addr, cpte, *cpte, &pgsize); - for (i = 0; i < ncontig; ++i, ++cpte) { + for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) { changed = ptep_set_access_flags(vma, addr, cpte, pfn_pte(pfn, hugeprot), @@ -273,7 +273,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, cpte = huge_pte_offset(mm, addr); ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); - for (i = 0; i < ncontig; ++i, ++cpte) + for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) ptep_set_wrprotect(mm, addr, cpte); } else { ptep_set_wrprotect(mm, addr, ptep); @@ -291,7 +291,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma, cpte = huge_pte_offset(vma->vm_mm, addr); ncontig = find_num_contig(vma->vm_mm, addr, cpte, *cpte, &pgsize); - for (i = 0; i < ncontig; ++i, ++cpte) + for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) ptep_clear_flush(vma, addr, cpte); } else { ptep_clear_flush(vma, addr, ptep); -- GitLab From 2021e55d719acfebdabbb4d6d0104465f76c9158 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 8 Nov 2016 13:44:38 +0800 Subject: [PATCH 0591/1442] arm64: hugetlb: remove the wrong pmd check in find_num_contig() commit 20156ce2365d61beaa6f5a78a7a789044e0e7acc upstream. The find_num_contig() will return 1 when the pmd is not present. It will cause a kernel dead loop in the following scenaro: 1.) pmd entry is not present. 2.) the page fault occurs: ... hugetlb_fault() --> hugetlb_no_page() --> set_huge_pte_at() 3.) set_huge_pte_at() will only set the first PMD entry, since the find_num_contig just return 1 in this case. So the PMD entries are all empty except the first one. 4.) when kernel accesses the address mapped by the second PMD entry, a new page fault occurs: ... hugetlb_fault() --> huge_ptep_set_access_flags() The second PMD entry is still empty now. 5.) When the kernel returns, the access will cause a page fault again. The kernel will run like the "4)" above. We will see a dead loop since here. The dead loop is caught in the 32M hugetlb page (2M PMD + Contiguous bit). This patch removes wrong pmd check, and fixes this dead loop. This patch also removes the redundant checks for PGD/PUD in the find_num_contig(). Acked-by: Steve Capper Signed-off-by: Huang Shijie Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/hugetlbpage.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 52f2c8fd1228..fd96ba77389c 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -51,20 +51,8 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr, *pgsize = PAGE_SIZE; if (!pte_cont(pte)) return 1; - if (!pgd_present(*pgd)) { - VM_BUG_ON(!pgd_present(*pgd)); - return 1; - } pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) { - VM_BUG_ON(!pud_present(*pud)); - return 1; - } pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - VM_BUG_ON(!pmd_present(*pmd)); - return 1; - } if ((pte_t *)pmd == ptep) { *pgsize = PMD_SIZE; return CONT_PMDS; -- GitLab From b90a617fef04ae2e2394fead9eb8de5fcf8c7fdc Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Wed, 11 Jan 2017 14:02:00 +0800 Subject: [PATCH 0592/1442] arm64: hugetlb: fix the wrong return value for huge_ptep_set_access_flags commit 69d012345a1a32d3f03957f14d972efccc106a98 upstream. In current code, the @changed always returns the last one's status for the huge page with the contiguous bit set. This is really not what we want. Even one of the PTEs is changed, we should tell it to the caller. This patch fixes this issue. Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit") Signed-off-by: Huang Shijie Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index fd96ba77389c..45bec627bae3 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -239,7 +239,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, ncontig = find_num_contig(vma->vm_mm, addr, cpte, *cpte, &pgsize); for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) { - changed = ptep_set_access_flags(vma, addr, cpte, + changed |= ptep_set_access_flags(vma, addr, cpte, pfn_pte(pfn, hugeprot), dirty); -- GitLab From 4e764538512ebc82f13893709ddc0182b5c4af5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Sat, 12 Nov 2016 17:04:24 +0100 Subject: [PATCH 0593/1442] pinctrl: sh-pfc: Do not unconditionally support PIN_CONFIG_BIAS_DISABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5d7400c4acbf7fe633a976a89ee845f7333de3e4 upstream. Always stating PIN_CONFIG_BIAS_DISABLE is supported gives untrue output when examining /sys/kernel/debug/pinctrl/e6060000.pfc/pinconf-pins if the operation get_bias() is implemented but the pin is not handled by the get_bias() implementation. In that case the output will state that "input bias disabled" indicating that this pin has bias control support. Make support for PIN_CONFIG_BIAS_DISABLE depend on that the pin either supports SH_PFC_PIN_CFG_PULL_UP or SH_PFC_PIN_CFG_PULL_DOWN. This also solves the issue where SoC specific implementations print error messages if their particular implementation of {set,get}_bias() is called with a pin it does not know about. Signed-off-by: Niklas Söderlund Acked-by: Laurent Pinchart Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/sh-pfc/pinctrl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c index c5772584594c..fcacfa73ef6e 100644 --- a/drivers/pinctrl/sh-pfc/pinctrl.c +++ b/drivers/pinctrl/sh-pfc/pinctrl.c @@ -570,7 +570,8 @@ static bool sh_pfc_pinconf_validate(struct sh_pfc *pfc, unsigned int _pin, switch (param) { case PIN_CONFIG_BIAS_DISABLE: - return true; + return pin->configs & + (SH_PFC_PIN_CFG_PULL_UP | SH_PFC_PIN_CFG_PULL_DOWN); case PIN_CONFIG_BIAS_PULL_UP: return pin->configs & SH_PFC_PIN_CFG_PULL_UP; -- GitLab From 73f1dd691f22d58c9bfb970cb8b5a3b25fb4f3c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Tue, 25 Sep 2012 17:37:14 -0700 Subject: [PATCH 0594/1442] ANDROID: staging: lowmemorykiller: Add config option to support oom_adj values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The conversion to use oom_score_adj instead of the deprecated oom_adj values breaks existing user-space code. Add a config option to convert oom_adj values written to oom_score_adj values if they appear to be valid oom_adj values. Change-Id: I68308125059b802ee2991feefb07e9703bc48549 Signed-off-by: Arve Hjønnevåg --- drivers/staging/android/Kconfig | 9 +++ drivers/staging/android/lowmemorykiller.c | 85 +++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index 6c00d6f765c6..e41327aad492 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -24,6 +24,15 @@ config ANDROID_LOW_MEMORY_KILLER scripts (/init.rc), and it defines priority values with minimum free memory size for each priority. +config ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES + bool "Android Low Memory Killer: detect oom_adj values" + depends on ANDROID_LOW_MEMORY_KILLER + default y + ---help--- + Detect oom_adj values written to + /sys/module/lowmemorykiller/parameters/adj and convert them + to oom_score_adj values. + source "drivers/staging/android/ion/Kconfig" endif # if ANDROID diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index ec3b66561412..9f577c0e1617 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -200,12 +200,97 @@ static int __init lowmem_init(void) } device_initcall(lowmem_init); +#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES +static short lowmem_oom_adj_to_oom_score_adj(short oom_adj) +{ + if (oom_adj == OOM_ADJUST_MAX) + return OOM_SCORE_ADJ_MAX; + else + return (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; +} + +static void lowmem_autodetect_oom_adj_values(void) +{ + int i; + short oom_adj; + short oom_score_adj; + int array_size = ARRAY_SIZE(lowmem_adj); + + if (lowmem_adj_size < array_size) + array_size = lowmem_adj_size; + + if (array_size <= 0) + return; + + oom_adj = lowmem_adj[array_size - 1]; + if (oom_adj > OOM_ADJUST_MAX) + return; + + oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj); + if (oom_score_adj <= OOM_ADJUST_MAX) + return; + + lowmem_print(1, "lowmem_shrink: convert oom_adj to oom_score_adj:\n"); + for (i = 0; i < array_size; i++) { + oom_adj = lowmem_adj[i]; + oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj); + lowmem_adj[i] = oom_score_adj; + lowmem_print(1, "oom_adj %d => oom_score_adj %d\n", + oom_adj, oom_score_adj); + } +} + +static int lowmem_adj_array_set(const char *val, const struct kernel_param *kp) +{ + int ret; + + ret = param_array_ops.set(val, kp); + + /* HACK: Autodetect oom_adj values in lowmem_adj array */ + lowmem_autodetect_oom_adj_values(); + + return ret; +} + +static int lowmem_adj_array_get(char *buffer, const struct kernel_param *kp) +{ + return param_array_ops.get(buffer, kp); +} + +static void lowmem_adj_array_free(void *arg) +{ + param_array_ops.free(arg); +} + +static struct kernel_param_ops lowmem_adj_array_ops = { + .set = lowmem_adj_array_set, + .get = lowmem_adj_array_get, + .free = lowmem_adj_array_free, +}; + +static const struct kparam_array __param_arr_adj = { + .max = ARRAY_SIZE(lowmem_adj), + .num = &lowmem_adj_size, + .ops = ¶m_ops_short, + .elemsize = sizeof(lowmem_adj[0]), + .elem = lowmem_adj, +}; +#endif + /* * not really modular, but the easiest way to keep compat with existing * bootargs behaviour is to continue using module_param here. */ module_param_named(cost, lowmem_shrinker.seeks, int, 0644); +#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES +__module_param_call(MODULE_PARAM_PREFIX, adj, + &lowmem_adj_array_ops, + .arr = &__param_arr_adj, + 0644, -1); +__MODULE_PARM_TYPE(adj, "array of short"); +#else module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size, 0644); +#endif module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size, 0644); module_param_named(debug_level, lowmem_debug_level, uint, 0644); -- GitLab From cbd47bd62399911c7e95ce82314c660e44a05e61 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Tue, 26 May 2015 11:28:47 +0200 Subject: [PATCH 0595/1442] ANDROID: lowmemorykiller: trace kill events. Allows for capturing lmk kill events and their rationale. Change-Id: Ibe215db5bb9806fc550c72c0b9832c85cbd56bf6 Signed-off-by: Martijn Coenen --- drivers/staging/android/lowmemorykiller.c | 12 ++++-- .../staging/android/trace/lowmemorykiller.h | 41 +++++++++++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 drivers/staging/android/trace/lowmemorykiller.h diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 9f577c0e1617..9f3766b4fd36 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -43,6 +43,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include "trace/lowmemorykiller.h" + static u32 lowmem_debug_level = 1; static short lowmem_adj[6] = { 0, @@ -165,6 +168,10 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) if (selected->mm) task_set_lmk_waiting(selected); task_unlock(selected); + long cache_size = other_file * (long)(PAGE_SIZE / 1024); + long cache_limit = minfree * (long)(PAGE_SIZE / 1024); + long free = other_free * (long)(PAGE_SIZE / 1024); + trace_lowmemory_kill(selected, cache_size, cache_limit, free); lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" " to free %ldkB on behalf of '%s' (%d) because\n" " cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" @@ -173,10 +180,9 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) selected_oom_score_adj, selected_tasksize * (long)(PAGE_SIZE / 1024), current->comm, current->pid, - other_file * (long)(PAGE_SIZE / 1024), - minfree * (long)(PAGE_SIZE / 1024), + cache_size, cache_limit, min_score_adj, - other_free * (long)(PAGE_SIZE / 1024)); + free); lowmem_deathpending_timeout = jiffies + HZ; rem += selected_tasksize; } diff --git a/drivers/staging/android/trace/lowmemorykiller.h b/drivers/staging/android/trace/lowmemorykiller.h new file mode 100644 index 000000000000..f43d3fae75ee --- /dev/null +++ b/drivers/staging/android/trace/lowmemorykiller.h @@ -0,0 +1,41 @@ +#undef TRACE_SYSTEM +#define TRACE_INCLUDE_PATH ../../drivers/staging/android/trace +#define TRACE_SYSTEM lowmemorykiller + +#if !defined(_TRACE_LOWMEMORYKILLER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOWMEMORYKILLER_H + +#include + +TRACE_EVENT(lowmemory_kill, + TP_PROTO(struct task_struct *killed_task, long cache_size, \ + long cache_limit, long free), + + TP_ARGS(killed_task, cache_size, cache_limit, free), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(long, pagecache_size) + __field(long, pagecache_limit) + __field(long, free) + ), + + TP_fast_assign( + memcpy(__entry->comm, killed_task->comm, TASK_COMM_LEN); + __entry->pid = killed_task->pid; + __entry->pagecache_size = cache_size; + __entry->pagecache_limit = cache_limit; + __entry->free = free; + ), + + TP_printk("%s (%d), page cache %ldkB (limit %ldkB), free %ldKb", + __entry->comm, __entry->pid, __entry->pagecache_size, + __entry->pagecache_limit, __entry->free) +); + + +#endif /* if !defined(_TRACE_LOWMEMORYKILLER_H) || defined(TRACE_HEADER_MULTI_READ) */ + +/* This part must be outside protection */ +#include -- GitLab From 092be1d02aca34d5347b90375956ddca076df605 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 30 Oct 2015 01:16:29 +0530 Subject: [PATCH 0596/1442] ANDROID: lowmemorykiller: use module_param_cb instead of __module_param_call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use module_param_cb helper routine instead of __module_param_call otherwise we run into following build error: CC drivers/staging/android/lowmemorykiller.o drivers/staging/android/lowmemorykiller.c:293:28: error: macro "__module_param_call" requires 7 arguments, but only 6 given S_IRUGO | S_IWUSR, -1); ^ drivers/staging/android/lowmemorykiller.c:290:1: warning: data definition has no type or storage class [enabled by default] __module_param_call(MODULE_PARAM_PREFIX, adj, ^ drivers/staging/android/lowmemorykiller.c:290:1: error: type defaults to ‘int’ in declaration of ‘__module_param_call’ [-Werror=implicit-int] drivers/staging/android/lowmemorykiller.c:273:32: warning: ‘lowmem_adj_array_ops’ defined but not used [-Wunused-variable] static struct kernel_param_ops lowmem_adj_array_ops = { ^ cc1: some warnings being treated as errors make[3]: *** [drivers/staging/android/lowmemorykiller.o] Error 1 Change-Id: I9597d30626f71c39710ac06207e52868f602ee73 Signed-off-by: Amit Pundir --- drivers/staging/android/lowmemorykiller.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 9f3766b4fd36..eaae2f7921e2 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -289,10 +289,9 @@ static const struct kparam_array __param_arr_adj = { */ module_param_named(cost, lowmem_shrinker.seeks, int, 0644); #ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES -__module_param_call(MODULE_PARAM_PREFIX, adj, - &lowmem_adj_array_ops, - .arr = &__param_arr_adj, - 0644, -1); +module_param_cb(adj, &lowmem_adj_array_ops, + .arr = &__param_arr_adj, + 0644); __MODULE_PARM_TYPE(adj, "array of short"); #else module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size, 0644); -- GitLab From ca984139d6bf34e2b3021df602f8d663551723b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Wed, 15 Oct 2008 18:23:47 -0700 Subject: [PATCH 0597/1442] ANDROID: Input: Generic GPIO Input device. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supports keyboard matrixces, direct inputs, direct outputs and axes connected to gpios. Change-Id: I5e921e6e3a1cc169316ee3b665f4cc21b5735114 Signed-off-by: Arve Hjønnevåg Signed-off-by: Nick Pelly --- drivers/input/misc/Kconfig | 5 + drivers/input/misc/Makefile | 1 + drivers/input/misc/gpio_axis.c | 192 ++++++++++++++ drivers/input/misc/gpio_event.c | 249 +++++++++++++++++ drivers/input/misc/gpio_input.c | 376 ++++++++++++++++++++++++++ drivers/input/misc/gpio_matrix.c | 441 +++++++++++++++++++++++++++++++ drivers/input/misc/gpio_output.c | 97 +++++++ include/linux/gpio_event.h | 170 ++++++++++++ 8 files changed, 1531 insertions(+) create mode 100644 drivers/input/misc/gpio_axis.c create mode 100644 drivers/input/misc/gpio_event.c create mode 100644 drivers/input/misc/gpio_input.c create mode 100644 drivers/input/misc/gpio_matrix.c create mode 100644 drivers/input/misc/gpio_output.c create mode 100644 include/linux/gpio_event.h diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 7ffb614ce566..ea1b5e441a96 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -535,6 +535,11 @@ config INPUT_SGI_BTNS To compile this driver as a module, choose M here: the module will be called sgi_btns. +config INPUT_GPIO + tristate "GPIO driver support" + help + Say Y here if you want to support gpio based keys, wheels etc... + config HP_SDC_RTC tristate "HP SDC Real Time Clock" depends on (GSC || HP300) && SERIO diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index 0b6d025f0487..13bf7dbba627 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_INPUT_GP2A) += gp2ap002a00f.o obj-$(CONFIG_INPUT_GPIO_BEEPER) += gpio-beeper.o obj-$(CONFIG_INPUT_GPIO_TILT_POLLED) += gpio_tilt_polled.o obj-$(CONFIG_INPUT_GPIO_DECODER) += gpio_decoder.o +obj-$(CONFIG_INPUT_GPIO) += gpio_event.o gpio_matrix.o gpio_input.o gpio_output.o gpio_axis.o obj-$(CONFIG_INPUT_HISI_POWERKEY) += hisi_powerkey.o obj-$(CONFIG_HP_SDC_RTC) += hp_sdc_rtc.o obj-$(CONFIG_INPUT_IMS_PCU) += ims-pcu.o diff --git a/drivers/input/misc/gpio_axis.c b/drivers/input/misc/gpio_axis.c new file mode 100644 index 000000000000..0acf4a576f53 --- /dev/null +++ b/drivers/input/misc/gpio_axis.c @@ -0,0 +1,192 @@ +/* drivers/input/misc/gpio_axis.c + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include + +struct gpio_axis_state { + struct gpio_event_input_devs *input_devs; + struct gpio_event_axis_info *info; + uint32_t pos; +}; + +uint16_t gpio_axis_4bit_gray_map_table[] = { + [0x0] = 0x0, [0x1] = 0x1, /* 0000 0001 */ + [0x3] = 0x2, [0x2] = 0x3, /* 0011 0010 */ + [0x6] = 0x4, [0x7] = 0x5, /* 0110 0111 */ + [0x5] = 0x6, [0x4] = 0x7, /* 0101 0100 */ + [0xc] = 0x8, [0xd] = 0x9, /* 1100 1101 */ + [0xf] = 0xa, [0xe] = 0xb, /* 1111 1110 */ + [0xa] = 0xc, [0xb] = 0xd, /* 1010 1011 */ + [0x9] = 0xe, [0x8] = 0xf, /* 1001 1000 */ +}; +uint16_t gpio_axis_4bit_gray_map(struct gpio_event_axis_info *info, uint16_t in) +{ + return gpio_axis_4bit_gray_map_table[in]; +} + +uint16_t gpio_axis_5bit_singletrack_map_table[] = { + [0x10] = 0x00, [0x14] = 0x01, [0x1c] = 0x02, /* 10000 10100 11100 */ + [0x1e] = 0x03, [0x1a] = 0x04, [0x18] = 0x05, /* 11110 11010 11000 */ + [0x08] = 0x06, [0x0a] = 0x07, [0x0e] = 0x08, /* 01000 01010 01110 */ + [0x0f] = 0x09, [0x0d] = 0x0a, [0x0c] = 0x0b, /* 01111 01101 01100 */ + [0x04] = 0x0c, [0x05] = 0x0d, [0x07] = 0x0e, /* 00100 00101 00111 */ + [0x17] = 0x0f, [0x16] = 0x10, [0x06] = 0x11, /* 10111 10110 00110 */ + [0x02] = 0x12, [0x12] = 0x13, [0x13] = 0x14, /* 00010 10010 10011 */ + [0x1b] = 0x15, [0x0b] = 0x16, [0x03] = 0x17, /* 11011 01011 00011 */ + [0x01] = 0x18, [0x09] = 0x19, [0x19] = 0x1a, /* 00001 01001 11001 */ + [0x1d] = 0x1b, [0x15] = 0x1c, [0x11] = 0x1d, /* 11101 10101 10001 */ +}; +uint16_t gpio_axis_5bit_singletrack_map( + struct gpio_event_axis_info *info, uint16_t in) +{ + return gpio_axis_5bit_singletrack_map_table[in]; +} + +static void gpio_event_update_axis(struct gpio_axis_state *as, int report) +{ + struct gpio_event_axis_info *ai = as->info; + int i; + int change; + uint16_t state = 0; + uint16_t pos; + uint16_t old_pos = as->pos; + for (i = ai->count - 1; i >= 0; i--) + state = (state << 1) | gpio_get_value(ai->gpio[i]); + pos = ai->map(ai, state); + if (ai->flags & GPIOEAF_PRINT_RAW) + pr_info("axis %d-%d raw %x, pos %d -> %d\n", + ai->type, ai->code, state, old_pos, pos); + if (report && pos != old_pos) { + if (ai->type == EV_REL) { + change = (ai->decoded_size + pos - old_pos) % + ai->decoded_size; + if (change > ai->decoded_size / 2) + change -= ai->decoded_size; + if (change == ai->decoded_size / 2) { + if (ai->flags & GPIOEAF_PRINT_EVENT) + pr_info("axis %d-%d unknown direction, " + "pos %d -> %d\n", ai->type, + ai->code, old_pos, pos); + change = 0; /* no closest direction */ + } + if (ai->flags & GPIOEAF_PRINT_EVENT) + pr_info("axis %d-%d change %d\n", + ai->type, ai->code, change); + input_report_rel(as->input_devs->dev[ai->dev], + ai->code, change); + } else { + if (ai->flags & GPIOEAF_PRINT_EVENT) + pr_info("axis %d-%d now %d\n", + ai->type, ai->code, pos); + input_event(as->input_devs->dev[ai->dev], + ai->type, ai->code, pos); + } + input_sync(as->input_devs->dev[ai->dev]); + } + as->pos = pos; +} + +static irqreturn_t gpio_axis_irq_handler(int irq, void *dev_id) +{ + struct gpio_axis_state *as = dev_id; + gpio_event_update_axis(as, 1); + return IRQ_HANDLED; +} + +int gpio_event_axis_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func) +{ + int ret; + int i; + int irq; + struct gpio_event_axis_info *ai; + struct gpio_axis_state *as; + + ai = container_of(info, struct gpio_event_axis_info, info); + if (func == GPIO_EVENT_FUNC_SUSPEND) { + for (i = 0; i < ai->count; i++) + disable_irq(gpio_to_irq(ai->gpio[i])); + return 0; + } + if (func == GPIO_EVENT_FUNC_RESUME) { + for (i = 0; i < ai->count; i++) + enable_irq(gpio_to_irq(ai->gpio[i])); + return 0; + } + + if (func == GPIO_EVENT_FUNC_INIT) { + *data = as = kmalloc(sizeof(*as), GFP_KERNEL); + if (as == NULL) { + ret = -ENOMEM; + goto err_alloc_axis_state_failed; + } + as->input_devs = input_devs; + as->info = ai; + if (ai->dev >= input_devs->count) { + pr_err("gpio_event_axis: bad device index %d >= %d " + "for %d:%d\n", ai->dev, input_devs->count, + ai->type, ai->code); + ret = -EINVAL; + goto err_bad_device_index; + } + + input_set_capability(input_devs->dev[ai->dev], + ai->type, ai->code); + if (ai->type == EV_ABS) { + input_set_abs_params(input_devs->dev[ai->dev], ai->code, + 0, ai->decoded_size - 1, 0, 0); + } + for (i = 0; i < ai->count; i++) { + ret = gpio_request(ai->gpio[i], "gpio_event_axis"); + if (ret < 0) + goto err_request_gpio_failed; + ret = gpio_direction_input(ai->gpio[i]); + if (ret < 0) + goto err_gpio_direction_input_failed; + ret = irq = gpio_to_irq(ai->gpio[i]); + if (ret < 0) + goto err_get_irq_num_failed; + ret = request_irq(irq, gpio_axis_irq_handler, + IRQF_TRIGGER_RISING | + IRQF_TRIGGER_FALLING, + "gpio_event_axis", as); + if (ret < 0) + goto err_request_irq_failed; + } + gpio_event_update_axis(as, 0); + return 0; + } + + ret = 0; + as = *data; + for (i = ai->count - 1; i >= 0; i--) { + free_irq(gpio_to_irq(ai->gpio[i]), as); +err_request_irq_failed: +err_get_irq_num_failed: +err_gpio_direction_input_failed: + gpio_free(ai->gpio[i]); +err_request_gpio_failed: + ; + } +err_bad_device_index: + kfree(as); + *data = NULL; +err_alloc_axis_state_failed: + return ret; +} diff --git a/drivers/input/misc/gpio_event.c b/drivers/input/misc/gpio_event.c new file mode 100644 index 000000000000..d4e5b4dfe19f --- /dev/null +++ b/drivers/input/misc/gpio_event.c @@ -0,0 +1,249 @@ +/* drivers/input/misc/gpio_event.c + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct gpio_event { + struct gpio_event_input_devs *input_devs; + const struct gpio_event_platform_data *info; + struct early_suspend early_suspend; + void *state[0]; +}; + +static int gpio_input_event( + struct input_dev *dev, unsigned int type, unsigned int code, int value) +{ + int i; + int devnr; + int ret = 0; + int tmp_ret; + struct gpio_event_info **ii; + struct gpio_event *ip = input_get_drvdata(dev); + + for (devnr = 0; devnr < ip->input_devs->count; devnr++) + if (ip->input_devs->dev[devnr] == dev) + break; + if (devnr == ip->input_devs->count) { + pr_err("gpio_input_event: unknown device %p\n", dev); + return -EIO; + } + + for (i = 0, ii = ip->info->info; i < ip->info->info_count; i++, ii++) { + if ((*ii)->event) { + tmp_ret = (*ii)->event(ip->input_devs, *ii, + &ip->state[i], + devnr, type, code, value); + if (tmp_ret) + ret = tmp_ret; + } + } + return ret; +} + +static int gpio_event_call_all_func(struct gpio_event *ip, int func) +{ + int i; + int ret; + struct gpio_event_info **ii; + + if (func == GPIO_EVENT_FUNC_INIT || func == GPIO_EVENT_FUNC_RESUME) { + ii = ip->info->info; + for (i = 0; i < ip->info->info_count; i++, ii++) { + if ((*ii)->func == NULL) { + ret = -ENODEV; + pr_err("gpio_event_probe: Incomplete pdata, " + "no function\n"); + goto err_no_func; + } + if (func == GPIO_EVENT_FUNC_RESUME && (*ii)->no_suspend) + continue; + ret = (*ii)->func(ip->input_devs, *ii, &ip->state[i], + func); + if (ret) { + pr_err("gpio_event_probe: function failed\n"); + goto err_func_failed; + } + } + return 0; + } + + ret = 0; + i = ip->info->info_count; + ii = ip->info->info + i; + while (i > 0) { + i--; + ii--; + if ((func & ~1) == GPIO_EVENT_FUNC_SUSPEND && (*ii)->no_suspend) + continue; + (*ii)->func(ip->input_devs, *ii, &ip->state[i], func & ~1); +err_func_failed: +err_no_func: + ; + } + return ret; +} + +#ifdef CONFIG_HAS_EARLYSUSPEND +void gpio_event_suspend(struct early_suspend *h) +{ + struct gpio_event *ip; + ip = container_of(h, struct gpio_event, early_suspend); + gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_SUSPEND); + ip->info->power(ip->info, 0); +} + +void gpio_event_resume(struct early_suspend *h) +{ + struct gpio_event *ip; + ip = container_of(h, struct gpio_event, early_suspend); + ip->info->power(ip->info, 1); + gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_RESUME); +} +#endif + +static int gpio_event_probe(struct platform_device *pdev) +{ + int err; + struct gpio_event *ip; + struct gpio_event_platform_data *event_info; + int dev_count = 1; + int i; + int registered = 0; + + event_info = pdev->dev.platform_data; + if (event_info == NULL) { + pr_err("gpio_event_probe: No pdata\n"); + return -ENODEV; + } + if ((!event_info->name && !event_info->names[0]) || + !event_info->info || !event_info->info_count) { + pr_err("gpio_event_probe: Incomplete pdata\n"); + return -ENODEV; + } + if (!event_info->name) + while (event_info->names[dev_count]) + dev_count++; + ip = kzalloc(sizeof(*ip) + + sizeof(ip->state[0]) * event_info->info_count + + sizeof(*ip->input_devs) + + sizeof(ip->input_devs->dev[0]) * dev_count, GFP_KERNEL); + if (ip == NULL) { + err = -ENOMEM; + pr_err("gpio_event_probe: Failed to allocate private data\n"); + goto err_kp_alloc_failed; + } + ip->input_devs = (void*)&ip->state[event_info->info_count]; + platform_set_drvdata(pdev, ip); + + for (i = 0; i < dev_count; i++) { + struct input_dev *input_dev = input_allocate_device(); + if (input_dev == NULL) { + err = -ENOMEM; + pr_err("gpio_event_probe: " + "Failed to allocate input device\n"); + goto err_input_dev_alloc_failed; + } + input_set_drvdata(input_dev, ip); + input_dev->name = event_info->name ? + event_info->name : event_info->names[i]; + input_dev->event = gpio_input_event; + ip->input_devs->dev[i] = input_dev; + } + ip->input_devs->count = dev_count; + ip->info = event_info; + if (event_info->power) { +#ifdef CONFIG_HAS_EARLYSUSPEND + ip->early_suspend.level = EARLY_SUSPEND_LEVEL_BLANK_SCREEN + 1; + ip->early_suspend.suspend = gpio_event_suspend; + ip->early_suspend.resume = gpio_event_resume; + register_early_suspend(&ip->early_suspend); +#endif + ip->info->power(ip->info, 1); + } + + err = gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_INIT); + if (err) + goto err_call_all_func_failed; + + for (i = 0; i < dev_count; i++) { + err = input_register_device(ip->input_devs->dev[i]); + if (err) { + pr_err("gpio_event_probe: Unable to register %s " + "input device\n", ip->input_devs->dev[i]->name); + goto err_input_register_device_failed; + } + registered++; + } + + return 0; + +err_input_register_device_failed: + gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_UNINIT); +err_call_all_func_failed: + if (event_info->power) { +#ifdef CONFIG_HAS_EARLYSUSPEND + unregister_early_suspend(&ip->early_suspend); +#endif + ip->info->power(ip->info, 0); + } + for (i = 0; i < registered; i++) + input_unregister_device(ip->input_devs->dev[i]); + for (i = dev_count - 1; i >= registered; i--) { + input_free_device(ip->input_devs->dev[i]); +err_input_dev_alloc_failed: + ; + } + kfree(ip); +err_kp_alloc_failed: + return err; +} + +static int gpio_event_remove(struct platform_device *pdev) +{ + struct gpio_event *ip = platform_get_drvdata(pdev); + int i; + + gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_UNINIT); + if (ip->info->power) { +#ifdef CONFIG_HAS_EARLYSUSPEND + unregister_early_suspend(&ip->early_suspend); +#endif + ip->info->power(ip->info, 0); + } + for (i = 0; i < ip->input_devs->count; i++) + input_unregister_device(ip->input_devs->dev[i]); + kfree(ip); + return 0; +} + +static struct platform_driver gpio_event_driver = { + .probe = gpio_event_probe, + .remove = gpio_event_remove, + .driver = { + .name = GPIO_EVENT_DEV_NAME, + }, +}; + +module_platform_driver(gpio_event_driver); + +MODULE_DESCRIPTION("GPIO Event Driver"); +MODULE_LICENSE("GPL"); + diff --git a/drivers/input/misc/gpio_input.c b/drivers/input/misc/gpio_input.c new file mode 100644 index 000000000000..6a0c31510968 --- /dev/null +++ b/drivers/input/misc/gpio_input.c @@ -0,0 +1,376 @@ +/* drivers/input/misc/gpio_input.c + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +enum { + DEBOUNCE_UNSTABLE = BIT(0), /* Got irq, while debouncing */ + DEBOUNCE_PRESSED = BIT(1), + DEBOUNCE_NOTPRESSED = BIT(2), + DEBOUNCE_WAIT_IRQ = BIT(3), /* Stable irq state */ + DEBOUNCE_POLL = BIT(4), /* Stable polling state */ + + DEBOUNCE_UNKNOWN = + DEBOUNCE_PRESSED | DEBOUNCE_NOTPRESSED, +}; + +struct gpio_key_state { + struct gpio_input_state *ds; + uint8_t debounce; +}; + +struct gpio_input_state { + struct gpio_event_input_devs *input_devs; + const struct gpio_event_input_info *info; + struct hrtimer timer; + int use_irq; + int debounce_count; + spinlock_t irq_lock; + struct wake_lock wake_lock; + struct gpio_key_state key_state[0]; +}; + +static enum hrtimer_restart gpio_event_input_timer_func(struct hrtimer *timer) +{ + int i; + int pressed; + struct gpio_input_state *ds = + container_of(timer, struct gpio_input_state, timer); + unsigned gpio_flags = ds->info->flags; + unsigned npolarity; + int nkeys = ds->info->keymap_size; + const struct gpio_event_direct_entry *key_entry; + struct gpio_key_state *key_state; + unsigned long irqflags; + uint8_t debounce; + bool sync_needed; + +#if 0 + key_entry = kp->keys_info->keymap; + key_state = kp->key_state; + for (i = 0; i < nkeys; i++, key_entry++, key_state++) + pr_info("gpio_read_detect_status %d %d\n", key_entry->gpio, + gpio_read_detect_status(key_entry->gpio)); +#endif + key_entry = ds->info->keymap; + key_state = ds->key_state; + sync_needed = false; + spin_lock_irqsave(&ds->irq_lock, irqflags); + for (i = 0; i < nkeys; i++, key_entry++, key_state++) { + debounce = key_state->debounce; + if (debounce & DEBOUNCE_WAIT_IRQ) + continue; + if (key_state->debounce & DEBOUNCE_UNSTABLE) { + debounce = key_state->debounce = DEBOUNCE_UNKNOWN; + enable_irq(gpio_to_irq(key_entry->gpio)); + if (gpio_flags & GPIOEDF_PRINT_KEY_UNSTABLE) + pr_info("gpio_keys_scan_keys: key %x-%x, %d " + "(%d) continue debounce\n", + ds->info->type, key_entry->code, + i, key_entry->gpio); + } + npolarity = !(gpio_flags & GPIOEDF_ACTIVE_HIGH); + pressed = gpio_get_value(key_entry->gpio) ^ npolarity; + if (debounce & DEBOUNCE_POLL) { + if (pressed == !(debounce & DEBOUNCE_PRESSED)) { + ds->debounce_count++; + key_state->debounce = DEBOUNCE_UNKNOWN; + if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE) + pr_info("gpio_keys_scan_keys: key %x-" + "%x, %d (%d) start debounce\n", + ds->info->type, key_entry->code, + i, key_entry->gpio); + } + continue; + } + if (pressed && (debounce & DEBOUNCE_NOTPRESSED)) { + if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE) + pr_info("gpio_keys_scan_keys: key %x-%x, %d " + "(%d) debounce pressed 1\n", + ds->info->type, key_entry->code, + i, key_entry->gpio); + key_state->debounce = DEBOUNCE_PRESSED; + continue; + } + if (!pressed && (debounce & DEBOUNCE_PRESSED)) { + if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE) + pr_info("gpio_keys_scan_keys: key %x-%x, %d " + "(%d) debounce pressed 0\n", + ds->info->type, key_entry->code, + i, key_entry->gpio); + key_state->debounce = DEBOUNCE_NOTPRESSED; + continue; + } + /* key is stable */ + ds->debounce_count--; + if (ds->use_irq) + key_state->debounce |= DEBOUNCE_WAIT_IRQ; + else + key_state->debounce |= DEBOUNCE_POLL; + if (gpio_flags & GPIOEDF_PRINT_KEYS) + pr_info("gpio_keys_scan_keys: key %x-%x, %d (%d) " + "changed to %d\n", ds->info->type, + key_entry->code, i, key_entry->gpio, pressed); + input_event(ds->input_devs->dev[key_entry->dev], ds->info->type, + key_entry->code, pressed); + sync_needed = true; + } + if (sync_needed) { + for (i = 0; i < ds->input_devs->count; i++) + input_sync(ds->input_devs->dev[i]); + } + +#if 0 + key_entry = kp->keys_info->keymap; + key_state = kp->key_state; + for (i = 0; i < nkeys; i++, key_entry++, key_state++) { + pr_info("gpio_read_detect_status %d %d\n", key_entry->gpio, + gpio_read_detect_status(key_entry->gpio)); + } +#endif + + if (ds->debounce_count) + hrtimer_start(timer, ds->info->debounce_time, HRTIMER_MODE_REL); + else if (!ds->use_irq) + hrtimer_start(timer, ds->info->poll_time, HRTIMER_MODE_REL); + else + wake_unlock(&ds->wake_lock); + + spin_unlock_irqrestore(&ds->irq_lock, irqflags); + + return HRTIMER_NORESTART; +} + +static irqreturn_t gpio_event_input_irq_handler(int irq, void *dev_id) +{ + struct gpio_key_state *ks = dev_id; + struct gpio_input_state *ds = ks->ds; + int keymap_index = ks - ds->key_state; + const struct gpio_event_direct_entry *key_entry; + unsigned long irqflags; + int pressed; + + if (!ds->use_irq) + return IRQ_HANDLED; + + key_entry = &ds->info->keymap[keymap_index]; + + if (ds->info->debounce_time.tv64) { + spin_lock_irqsave(&ds->irq_lock, irqflags); + if (ks->debounce & DEBOUNCE_WAIT_IRQ) { + ks->debounce = DEBOUNCE_UNKNOWN; + if (ds->debounce_count++ == 0) { + wake_lock(&ds->wake_lock); + hrtimer_start( + &ds->timer, ds->info->debounce_time, + HRTIMER_MODE_REL); + } + if (ds->info->flags & GPIOEDF_PRINT_KEY_DEBOUNCE) + pr_info("gpio_event_input_irq_handler: " + "key %x-%x, %d (%d) start debounce\n", + ds->info->type, key_entry->code, + keymap_index, key_entry->gpio); + } else { + disable_irq_nosync(irq); + ks->debounce = DEBOUNCE_UNSTABLE; + } + spin_unlock_irqrestore(&ds->irq_lock, irqflags); + } else { + pressed = gpio_get_value(key_entry->gpio) ^ + !(ds->info->flags & GPIOEDF_ACTIVE_HIGH); + if (ds->info->flags & GPIOEDF_PRINT_KEYS) + pr_info("gpio_event_input_irq_handler: key %x-%x, %d " + "(%d) changed to %d\n", + ds->info->type, key_entry->code, keymap_index, + key_entry->gpio, pressed); + input_event(ds->input_devs->dev[key_entry->dev], ds->info->type, + key_entry->code, pressed); + input_sync(ds->input_devs->dev[key_entry->dev]); + } + return IRQ_HANDLED; +} + +static int gpio_event_input_request_irqs(struct gpio_input_state *ds) +{ + int i; + int err; + unsigned int irq; + unsigned long req_flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING; + + for (i = 0; i < ds->info->keymap_size; i++) { + err = irq = gpio_to_irq(ds->info->keymap[i].gpio); + if (err < 0) + goto err_gpio_get_irq_num_failed; + err = request_irq(irq, gpio_event_input_irq_handler, + req_flags, "gpio_keys", &ds->key_state[i]); + if (err) { + pr_err("gpio_event_input_request_irqs: request_irq " + "failed for input %d, irq %d\n", + ds->info->keymap[i].gpio, irq); + goto err_request_irq_failed; + } + if (ds->info->info.no_suspend) { + err = enable_irq_wake(irq); + if (err) { + pr_err("gpio_event_input_request_irqs: " + "enable_irq_wake failed for input %d, " + "irq %d\n", + ds->info->keymap[i].gpio, irq); + goto err_enable_irq_wake_failed; + } + } + } + return 0; + + for (i = ds->info->keymap_size - 1; i >= 0; i--) { + irq = gpio_to_irq(ds->info->keymap[i].gpio); + if (ds->info->info.no_suspend) + disable_irq_wake(irq); +err_enable_irq_wake_failed: + free_irq(irq, &ds->key_state[i]); +err_request_irq_failed: +err_gpio_get_irq_num_failed: + ; + } + return err; +} + +int gpio_event_input_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func) +{ + int ret; + int i; + unsigned long irqflags; + struct gpio_event_input_info *di; + struct gpio_input_state *ds = *data; + + di = container_of(info, struct gpio_event_input_info, info); + + if (func == GPIO_EVENT_FUNC_SUSPEND) { + if (ds->use_irq) + for (i = 0; i < di->keymap_size; i++) + disable_irq(gpio_to_irq(di->keymap[i].gpio)); + hrtimer_cancel(&ds->timer); + return 0; + } + if (func == GPIO_EVENT_FUNC_RESUME) { + spin_lock_irqsave(&ds->irq_lock, irqflags); + if (ds->use_irq) + for (i = 0; i < di->keymap_size; i++) + enable_irq(gpio_to_irq(di->keymap[i].gpio)); + hrtimer_start(&ds->timer, ktime_set(0, 0), HRTIMER_MODE_REL); + spin_unlock_irqrestore(&ds->irq_lock, irqflags); + return 0; + } + + if (func == GPIO_EVENT_FUNC_INIT) { + if (ktime_to_ns(di->poll_time) <= 0) + di->poll_time = ktime_set(0, 20 * NSEC_PER_MSEC); + + *data = ds = kzalloc(sizeof(*ds) + sizeof(ds->key_state[0]) * + di->keymap_size, GFP_KERNEL); + if (ds == NULL) { + ret = -ENOMEM; + pr_err("gpio_event_input_func: " + "Failed to allocate private data\n"); + goto err_ds_alloc_failed; + } + ds->debounce_count = di->keymap_size; + ds->input_devs = input_devs; + ds->info = di; + wake_lock_init(&ds->wake_lock, WAKE_LOCK_SUSPEND, "gpio_input"); + spin_lock_init(&ds->irq_lock); + + for (i = 0; i < di->keymap_size; i++) { + int dev = di->keymap[i].dev; + if (dev >= input_devs->count) { + pr_err("gpio_event_input_func: bad device " + "index %d >= %d for key code %d\n", + dev, input_devs->count, + di->keymap[i].code); + ret = -EINVAL; + goto err_bad_keymap; + } + input_set_capability(input_devs->dev[dev], di->type, + di->keymap[i].code); + ds->key_state[i].ds = ds; + ds->key_state[i].debounce = DEBOUNCE_UNKNOWN; + } + + for (i = 0; i < di->keymap_size; i++) { + ret = gpio_request(di->keymap[i].gpio, "gpio_kp_in"); + if (ret) { + pr_err("gpio_event_input_func: gpio_request " + "failed for %d\n", di->keymap[i].gpio); + goto err_gpio_request_failed; + } + ret = gpio_direction_input(di->keymap[i].gpio); + if (ret) { + pr_err("gpio_event_input_func: " + "gpio_direction_input failed for %d\n", + di->keymap[i].gpio); + goto err_gpio_configure_failed; + } + } + + ret = gpio_event_input_request_irqs(ds); + + spin_lock_irqsave(&ds->irq_lock, irqflags); + ds->use_irq = ret == 0; + + pr_info("GPIO Input Driver: Start gpio inputs for %s%s in %s " + "mode\n", input_devs->dev[0]->name, + (input_devs->count > 1) ? "..." : "", + ret == 0 ? "interrupt" : "polling"); + + hrtimer_init(&ds->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + ds->timer.function = gpio_event_input_timer_func; + hrtimer_start(&ds->timer, ktime_set(0, 0), HRTIMER_MODE_REL); + spin_unlock_irqrestore(&ds->irq_lock, irqflags); + return 0; + } + + ret = 0; + spin_lock_irqsave(&ds->irq_lock, irqflags); + hrtimer_cancel(&ds->timer); + if (ds->use_irq) { + for (i = di->keymap_size - 1; i >= 0; i--) { + int irq = gpio_to_irq(di->keymap[i].gpio); + if (ds->info->info.no_suspend) + disable_irq_wake(irq); + free_irq(irq, &ds->key_state[i]); + } + } + spin_unlock_irqrestore(&ds->irq_lock, irqflags); + + for (i = di->keymap_size - 1; i >= 0; i--) { +err_gpio_configure_failed: + gpio_free(di->keymap[i].gpio); +err_gpio_request_failed: + ; + } +err_bad_keymap: + wake_lock_destroy(&ds->wake_lock); + kfree(ds); +err_ds_alloc_failed: + return ret; +} diff --git a/drivers/input/misc/gpio_matrix.c b/drivers/input/misc/gpio_matrix.c new file mode 100644 index 000000000000..eaa9e89d473a --- /dev/null +++ b/drivers/input/misc/gpio_matrix.c @@ -0,0 +1,441 @@ +/* drivers/input/misc/gpio_matrix.c + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct gpio_kp { + struct gpio_event_input_devs *input_devs; + struct gpio_event_matrix_info *keypad_info; + struct hrtimer timer; + struct wake_lock wake_lock; + int current_output; + unsigned int use_irq:1; + unsigned int key_state_changed:1; + unsigned int last_key_state_changed:1; + unsigned int some_keys_pressed:2; + unsigned int disabled_irq:1; + unsigned long keys_pressed[0]; +}; + +static void clear_phantom_key(struct gpio_kp *kp, int out, int in) +{ + struct gpio_event_matrix_info *mi = kp->keypad_info; + int key_index = out * mi->ninputs + in; + unsigned short keyentry = mi->keymap[key_index]; + unsigned short keycode = keyentry & MATRIX_KEY_MASK; + unsigned short dev = keyentry >> MATRIX_CODE_BITS; + + if (!test_bit(keycode, kp->input_devs->dev[dev]->key)) { + if (mi->flags & GPIOKPF_PRINT_PHANTOM_KEYS) + pr_info("gpiomatrix: phantom key %x, %d-%d (%d-%d) " + "cleared\n", keycode, out, in, + mi->output_gpios[out], mi->input_gpios[in]); + __clear_bit(key_index, kp->keys_pressed); + } else { + if (mi->flags & GPIOKPF_PRINT_PHANTOM_KEYS) + pr_info("gpiomatrix: phantom key %x, %d-%d (%d-%d) " + "not cleared\n", keycode, out, in, + mi->output_gpios[out], mi->input_gpios[in]); + } +} + +static int restore_keys_for_input(struct gpio_kp *kp, int out, int in) +{ + int rv = 0; + int key_index; + + key_index = out * kp->keypad_info->ninputs + in; + while (out < kp->keypad_info->noutputs) { + if (test_bit(key_index, kp->keys_pressed)) { + rv = 1; + clear_phantom_key(kp, out, in); + } + key_index += kp->keypad_info->ninputs; + out++; + } + return rv; +} + +static void remove_phantom_keys(struct gpio_kp *kp) +{ + int out, in, inp; + int key_index; + + if (kp->some_keys_pressed < 3) + return; + + for (out = 0; out < kp->keypad_info->noutputs; out++) { + inp = -1; + key_index = out * kp->keypad_info->ninputs; + for (in = 0; in < kp->keypad_info->ninputs; in++, key_index++) { + if (test_bit(key_index, kp->keys_pressed)) { + if (inp == -1) { + inp = in; + continue; + } + if (inp >= 0) { + if (!restore_keys_for_input(kp, out + 1, + inp)) + break; + clear_phantom_key(kp, out, inp); + inp = -2; + } + restore_keys_for_input(kp, out, in); + } + } + } +} + +static void report_key(struct gpio_kp *kp, int key_index, int out, int in) +{ + struct gpio_event_matrix_info *mi = kp->keypad_info; + int pressed = test_bit(key_index, kp->keys_pressed); + unsigned short keyentry = mi->keymap[key_index]; + unsigned short keycode = keyentry & MATRIX_KEY_MASK; + unsigned short dev = keyentry >> MATRIX_CODE_BITS; + + if (pressed != test_bit(keycode, kp->input_devs->dev[dev]->key)) { + if (keycode == KEY_RESERVED) { + if (mi->flags & GPIOKPF_PRINT_UNMAPPED_KEYS) + pr_info("gpiomatrix: unmapped key, %d-%d " + "(%d-%d) changed to %d\n", + out, in, mi->output_gpios[out], + mi->input_gpios[in], pressed); + } else { + if (mi->flags & GPIOKPF_PRINT_MAPPED_KEYS) + pr_info("gpiomatrix: key %x, %d-%d (%d-%d) " + "changed to %d\n", keycode, + out, in, mi->output_gpios[out], + mi->input_gpios[in], pressed); + input_report_key(kp->input_devs->dev[dev], keycode, pressed); + } + } +} + +static void report_sync(struct gpio_kp *kp) +{ + int i; + + for (i = 0; i < kp->input_devs->count; i++) + input_sync(kp->input_devs->dev[i]); +} + +static enum hrtimer_restart gpio_keypad_timer_func(struct hrtimer *timer) +{ + int out, in; + int key_index; + int gpio; + struct gpio_kp *kp = container_of(timer, struct gpio_kp, timer); + struct gpio_event_matrix_info *mi = kp->keypad_info; + unsigned gpio_keypad_flags = mi->flags; + unsigned polarity = !!(gpio_keypad_flags & GPIOKPF_ACTIVE_HIGH); + + out = kp->current_output; + if (out == mi->noutputs) { + out = 0; + kp->last_key_state_changed = kp->key_state_changed; + kp->key_state_changed = 0; + kp->some_keys_pressed = 0; + } else { + key_index = out * mi->ninputs; + for (in = 0; in < mi->ninputs; in++, key_index++) { + gpio = mi->input_gpios[in]; + if (gpio_get_value(gpio) ^ !polarity) { + if (kp->some_keys_pressed < 3) + kp->some_keys_pressed++; + kp->key_state_changed |= !__test_and_set_bit( + key_index, kp->keys_pressed); + } else + kp->key_state_changed |= __test_and_clear_bit( + key_index, kp->keys_pressed); + } + gpio = mi->output_gpios[out]; + if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE) + gpio_set_value(gpio, !polarity); + else + gpio_direction_input(gpio); + out++; + } + kp->current_output = out; + if (out < mi->noutputs) { + gpio = mi->output_gpios[out]; + if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE) + gpio_set_value(gpio, polarity); + else + gpio_direction_output(gpio, polarity); + hrtimer_start(timer, mi->settle_time, HRTIMER_MODE_REL); + return HRTIMER_NORESTART; + } + if (gpio_keypad_flags & GPIOKPF_DEBOUNCE) { + if (kp->key_state_changed) { + hrtimer_start(&kp->timer, mi->debounce_delay, + HRTIMER_MODE_REL); + return HRTIMER_NORESTART; + } + kp->key_state_changed = kp->last_key_state_changed; + } + if (kp->key_state_changed) { + if (gpio_keypad_flags & GPIOKPF_REMOVE_SOME_PHANTOM_KEYS) + remove_phantom_keys(kp); + key_index = 0; + for (out = 0; out < mi->noutputs; out++) + for (in = 0; in < mi->ninputs; in++, key_index++) + report_key(kp, key_index, out, in); + report_sync(kp); + } + if (!kp->use_irq || kp->some_keys_pressed) { + hrtimer_start(timer, mi->poll_time, HRTIMER_MODE_REL); + return HRTIMER_NORESTART; + } + + /* No keys are pressed, reenable interrupt */ + for (out = 0; out < mi->noutputs; out++) { + if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE) + gpio_set_value(mi->output_gpios[out], polarity); + else + gpio_direction_output(mi->output_gpios[out], polarity); + } + for (in = 0; in < mi->ninputs; in++) + enable_irq(gpio_to_irq(mi->input_gpios[in])); + wake_unlock(&kp->wake_lock); + return HRTIMER_NORESTART; +} + +static irqreturn_t gpio_keypad_irq_handler(int irq_in, void *dev_id) +{ + int i; + struct gpio_kp *kp = dev_id; + struct gpio_event_matrix_info *mi = kp->keypad_info; + unsigned gpio_keypad_flags = mi->flags; + + if (!kp->use_irq) { + /* ignore interrupt while registering the handler */ + kp->disabled_irq = 1; + disable_irq_nosync(irq_in); + return IRQ_HANDLED; + } + + for (i = 0; i < mi->ninputs; i++) + disable_irq_nosync(gpio_to_irq(mi->input_gpios[i])); + for (i = 0; i < mi->noutputs; i++) { + if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE) + gpio_set_value(mi->output_gpios[i], + !(gpio_keypad_flags & GPIOKPF_ACTIVE_HIGH)); + else + gpio_direction_input(mi->output_gpios[i]); + } + wake_lock(&kp->wake_lock); + hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL); + return IRQ_HANDLED; +} + +static int gpio_keypad_request_irqs(struct gpio_kp *kp) +{ + int i; + int err; + unsigned int irq; + unsigned long request_flags; + struct gpio_event_matrix_info *mi = kp->keypad_info; + + switch (mi->flags & (GPIOKPF_ACTIVE_HIGH|GPIOKPF_LEVEL_TRIGGERED_IRQ)) { + default: + request_flags = IRQF_TRIGGER_FALLING; + break; + case GPIOKPF_ACTIVE_HIGH: + request_flags = IRQF_TRIGGER_RISING; + break; + case GPIOKPF_LEVEL_TRIGGERED_IRQ: + request_flags = IRQF_TRIGGER_LOW; + break; + case GPIOKPF_LEVEL_TRIGGERED_IRQ | GPIOKPF_ACTIVE_HIGH: + request_flags = IRQF_TRIGGER_HIGH; + break; + } + + for (i = 0; i < mi->ninputs; i++) { + err = irq = gpio_to_irq(mi->input_gpios[i]); + if (err < 0) + goto err_gpio_get_irq_num_failed; + err = request_irq(irq, gpio_keypad_irq_handler, request_flags, + "gpio_kp", kp); + if (err) { + pr_err("gpiomatrix: request_irq failed for input %d, " + "irq %d\n", mi->input_gpios[i], irq); + goto err_request_irq_failed; + } + err = enable_irq_wake(irq); + if (err) { + pr_err("gpiomatrix: set_irq_wake failed for input %d, " + "irq %d\n", mi->input_gpios[i], irq); + } + disable_irq(irq); + if (kp->disabled_irq) { + kp->disabled_irq = 0; + enable_irq(irq); + } + } + return 0; + + for (i = mi->noutputs - 1; i >= 0; i--) { + free_irq(gpio_to_irq(mi->input_gpios[i]), kp); +err_request_irq_failed: +err_gpio_get_irq_num_failed: + ; + } + return err; +} + +int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func) +{ + int i; + int err; + int key_count; + struct gpio_kp *kp; + struct gpio_event_matrix_info *mi; + + mi = container_of(info, struct gpio_event_matrix_info, info); + if (func == GPIO_EVENT_FUNC_SUSPEND || func == GPIO_EVENT_FUNC_RESUME) { + /* TODO: disable scanning */ + return 0; + } + + if (func == GPIO_EVENT_FUNC_INIT) { + if (mi->keymap == NULL || + mi->input_gpios == NULL || + mi->output_gpios == NULL) { + err = -ENODEV; + pr_err("gpiomatrix: Incomplete pdata\n"); + goto err_invalid_platform_data; + } + key_count = mi->ninputs * mi->noutputs; + + *data = kp = kzalloc(sizeof(*kp) + sizeof(kp->keys_pressed[0]) * + BITS_TO_LONGS(key_count), GFP_KERNEL); + if (kp == NULL) { + err = -ENOMEM; + pr_err("gpiomatrix: Failed to allocate private data\n"); + goto err_kp_alloc_failed; + } + kp->input_devs = input_devs; + kp->keypad_info = mi; + for (i = 0; i < key_count; i++) { + unsigned short keyentry = mi->keymap[i]; + unsigned short keycode = keyentry & MATRIX_KEY_MASK; + unsigned short dev = keyentry >> MATRIX_CODE_BITS; + if (dev >= input_devs->count) { + pr_err("gpiomatrix: bad device index %d >= " + "%d for key code %d\n", + dev, input_devs->count, keycode); + err = -EINVAL; + goto err_bad_keymap; + } + if (keycode && keycode <= KEY_MAX) + input_set_capability(input_devs->dev[dev], + EV_KEY, keycode); + } + + for (i = 0; i < mi->noutputs; i++) { + err = gpio_request(mi->output_gpios[i], "gpio_kp_out"); + if (err) { + pr_err("gpiomatrix: gpio_request failed for " + "output %d\n", mi->output_gpios[i]); + goto err_request_output_gpio_failed; + } + if (gpio_cansleep(mi->output_gpios[i])) { + pr_err("gpiomatrix: unsupported output gpio %d," + " can sleep\n", mi->output_gpios[i]); + err = -EINVAL; + goto err_output_gpio_configure_failed; + } + if (mi->flags & GPIOKPF_DRIVE_INACTIVE) + err = gpio_direction_output(mi->output_gpios[i], + !(mi->flags & GPIOKPF_ACTIVE_HIGH)); + else + err = gpio_direction_input(mi->output_gpios[i]); + if (err) { + pr_err("gpiomatrix: gpio_configure failed for " + "output %d\n", mi->output_gpios[i]); + goto err_output_gpio_configure_failed; + } + } + for (i = 0; i < mi->ninputs; i++) { + err = gpio_request(mi->input_gpios[i], "gpio_kp_in"); + if (err) { + pr_err("gpiomatrix: gpio_request failed for " + "input %d\n", mi->input_gpios[i]); + goto err_request_input_gpio_failed; + } + err = gpio_direction_input(mi->input_gpios[i]); + if (err) { + pr_err("gpiomatrix: gpio_direction_input failed" + " for input %d\n", mi->input_gpios[i]); + goto err_gpio_direction_input_failed; + } + } + kp->current_output = mi->noutputs; + kp->key_state_changed = 1; + + hrtimer_init(&kp->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kp->timer.function = gpio_keypad_timer_func; + wake_lock_init(&kp->wake_lock, WAKE_LOCK_SUSPEND, "gpio_kp"); + err = gpio_keypad_request_irqs(kp); + kp->use_irq = err == 0; + + pr_info("GPIO Matrix Keypad Driver: Start keypad matrix for " + "%s%s in %s mode\n", input_devs->dev[0]->name, + (input_devs->count > 1) ? "..." : "", + kp->use_irq ? "interrupt" : "polling"); + + if (kp->use_irq) + wake_lock(&kp->wake_lock); + hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL); + + return 0; + } + + err = 0; + kp = *data; + + if (kp->use_irq) + for (i = mi->noutputs - 1; i >= 0; i--) + free_irq(gpio_to_irq(mi->input_gpios[i]), kp); + + hrtimer_cancel(&kp->timer); + wake_lock_destroy(&kp->wake_lock); + for (i = mi->noutputs - 1; i >= 0; i--) { +err_gpio_direction_input_failed: + gpio_free(mi->input_gpios[i]); +err_request_input_gpio_failed: + ; + } + for (i = mi->noutputs - 1; i >= 0; i--) { +err_output_gpio_configure_failed: + gpio_free(mi->output_gpios[i]); +err_request_output_gpio_failed: + ; + } +err_bad_keymap: + kfree(kp); +err_kp_alloc_failed: +err_invalid_platform_data: + return err; +} diff --git a/drivers/input/misc/gpio_output.c b/drivers/input/misc/gpio_output.c new file mode 100644 index 000000000000..2aac2fad0a17 --- /dev/null +++ b/drivers/input/misc/gpio_output.c @@ -0,0 +1,97 @@ +/* drivers/input/misc/gpio_output.c + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include + +int gpio_event_output_event( + struct gpio_event_input_devs *input_devs, struct gpio_event_info *info, + void **data, unsigned int dev, unsigned int type, + unsigned int code, int value) +{ + int i; + struct gpio_event_output_info *oi; + oi = container_of(info, struct gpio_event_output_info, info); + if (type != oi->type) + return 0; + if (!(oi->flags & GPIOEDF_ACTIVE_HIGH)) + value = !value; + for (i = 0; i < oi->keymap_size; i++) + if (dev == oi->keymap[i].dev && code == oi->keymap[i].code) + gpio_set_value(oi->keymap[i].gpio, value); + return 0; +} + +int gpio_event_output_func( + struct gpio_event_input_devs *input_devs, struct gpio_event_info *info, + void **data, int func) +{ + int ret; + int i; + struct gpio_event_output_info *oi; + oi = container_of(info, struct gpio_event_output_info, info); + + if (func == GPIO_EVENT_FUNC_SUSPEND || func == GPIO_EVENT_FUNC_RESUME) + return 0; + + if (func == GPIO_EVENT_FUNC_INIT) { + int output_level = !(oi->flags & GPIOEDF_ACTIVE_HIGH); + + for (i = 0; i < oi->keymap_size; i++) { + int dev = oi->keymap[i].dev; + if (dev >= input_devs->count) { + pr_err("gpio_event_output_func: bad device " + "index %d >= %d for key code %d\n", + dev, input_devs->count, + oi->keymap[i].code); + ret = -EINVAL; + goto err_bad_keymap; + } + input_set_capability(input_devs->dev[dev], oi->type, + oi->keymap[i].code); + } + + for (i = 0; i < oi->keymap_size; i++) { + ret = gpio_request(oi->keymap[i].gpio, + "gpio_event_output"); + if (ret) { + pr_err("gpio_event_output_func: gpio_request " + "failed for %d\n", oi->keymap[i].gpio); + goto err_gpio_request_failed; + } + ret = gpio_direction_output(oi->keymap[i].gpio, + output_level); + if (ret) { + pr_err("gpio_event_output_func: " + "gpio_direction_output failed for %d\n", + oi->keymap[i].gpio); + goto err_gpio_direction_output_failed; + } + } + return 0; + } + + ret = 0; + for (i = oi->keymap_size - 1; i >= 0; i--) { +err_gpio_direction_output_failed: + gpio_free(oi->keymap[i].gpio); +err_gpio_request_failed: + ; + } +err_bad_keymap: + return ret; +} + diff --git a/include/linux/gpio_event.h b/include/linux/gpio_event.h new file mode 100644 index 000000000000..2613fc5e4a93 --- /dev/null +++ b/include/linux/gpio_event.h @@ -0,0 +1,170 @@ +/* include/linux/gpio_event.h + * + * Copyright (C) 2007 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_GPIO_EVENT_H +#define _LINUX_GPIO_EVENT_H + +#include + +struct gpio_event_input_devs { + int count; + struct input_dev *dev[]; +}; +enum { + GPIO_EVENT_FUNC_UNINIT = 0x0, + GPIO_EVENT_FUNC_INIT = 0x1, + GPIO_EVENT_FUNC_SUSPEND = 0x2, + GPIO_EVENT_FUNC_RESUME = 0x3, +}; +struct gpio_event_info { + int (*func)(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, + void **data, int func); + int (*event)(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, + void **data, unsigned int dev, unsigned int type, + unsigned int code, int value); /* out events */ + bool no_suspend; +}; + +struct gpio_event_platform_data { + const char *name; + struct gpio_event_info **info; + size_t info_count; + int (*power)(const struct gpio_event_platform_data *pdata, bool on); + const char *names[]; /* If name is NULL, names contain a NULL */ + /* terminated list of input devices to create */ +}; + +#define GPIO_EVENT_DEV_NAME "gpio-event" + +/* Key matrix */ + +enum gpio_event_matrix_flags { + /* unset: drive active output low, set: drive active output high */ + GPIOKPF_ACTIVE_HIGH = 1U << 0, + GPIOKPF_DEBOUNCE = 1U << 1, + GPIOKPF_REMOVE_SOME_PHANTOM_KEYS = 1U << 2, + GPIOKPF_REMOVE_PHANTOM_KEYS = GPIOKPF_REMOVE_SOME_PHANTOM_KEYS | + GPIOKPF_DEBOUNCE, + GPIOKPF_DRIVE_INACTIVE = 1U << 3, + GPIOKPF_LEVEL_TRIGGERED_IRQ = 1U << 4, + GPIOKPF_PRINT_UNMAPPED_KEYS = 1U << 16, + GPIOKPF_PRINT_MAPPED_KEYS = 1U << 17, + GPIOKPF_PRINT_PHANTOM_KEYS = 1U << 18, +}; + +#define MATRIX_CODE_BITS (10) +#define MATRIX_KEY_MASK ((1U << MATRIX_CODE_BITS) - 1) +#define MATRIX_KEY(dev, code) \ + (((dev) << MATRIX_CODE_BITS) | (code & MATRIX_KEY_MASK)) + +extern int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func); +struct gpio_event_matrix_info { + /* initialize to gpio_event_matrix_func */ + struct gpio_event_info info; + /* size must be ninputs * noutputs */ + const unsigned short *keymap; + unsigned int *input_gpios; + unsigned int *output_gpios; + unsigned int ninputs; + unsigned int noutputs; + /* time to wait before reading inputs after driving each output */ + ktime_t settle_time; + /* time to wait before scanning the keypad a second time */ + ktime_t debounce_delay; + ktime_t poll_time; + unsigned flags; +}; + +/* Directly connected inputs and outputs */ + +enum gpio_event_direct_flags { + GPIOEDF_ACTIVE_HIGH = 1U << 0, +/* GPIOEDF_USE_DOWN_IRQ = 1U << 1, */ +/* GPIOEDF_USE_IRQ = (1U << 2) | GPIOIDF_USE_DOWN_IRQ, */ + GPIOEDF_PRINT_KEYS = 1U << 8, + GPIOEDF_PRINT_KEY_DEBOUNCE = 1U << 9, + GPIOEDF_PRINT_KEY_UNSTABLE = 1U << 10, +}; + +struct gpio_event_direct_entry { + uint32_t gpio:16; + uint32_t code:10; + uint32_t dev:6; +}; + +/* inputs */ +extern int gpio_event_input_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func); +struct gpio_event_input_info { + /* initialize to gpio_event_input_func */ + struct gpio_event_info info; + ktime_t debounce_time; + ktime_t poll_time; + uint16_t flags; + uint16_t type; + const struct gpio_event_direct_entry *keymap; + size_t keymap_size; +}; + +/* outputs */ +extern int gpio_event_output_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func); +extern int gpio_event_output_event(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, + unsigned int dev, unsigned int type, + unsigned int code, int value); +struct gpio_event_output_info { + /* initialize to gpio_event_output_func and gpio_event_output_event */ + struct gpio_event_info info; + uint16_t flags; + uint16_t type; + const struct gpio_event_direct_entry *keymap; + size_t keymap_size; +}; + + +/* axes */ + +enum gpio_event_axis_flags { + GPIOEAF_PRINT_UNKNOWN_DIRECTION = 1U << 16, + GPIOEAF_PRINT_RAW = 1U << 17, + GPIOEAF_PRINT_EVENT = 1U << 18, +}; + +extern int gpio_event_axis_func(struct gpio_event_input_devs *input_devs, + struct gpio_event_info *info, void **data, int func); +struct gpio_event_axis_info { + /* initialize to gpio_event_axis_func */ + struct gpio_event_info info; + uint8_t count; /* number of gpios for this axis */ + uint8_t dev; /* device index when using multiple input devices */ + uint8_t type; /* EV_REL or EV_ABS */ + uint16_t code; + uint16_t decoded_size; + uint16_t (*map)(struct gpio_event_axis_info *info, uint16_t in); + uint32_t *gpio; + uint32_t flags; +}; +#define gpio_axis_2bit_gray_map gpio_axis_4bit_gray_map +#define gpio_axis_3bit_gray_map gpio_axis_4bit_gray_map +uint16_t gpio_axis_4bit_gray_map( + struct gpio_event_axis_info *info, uint16_t in); +uint16_t gpio_axis_5bit_singletrack_map( + struct gpio_event_axis_info *info, uint16_t in); + +#endif -- GitLab From 1c7059889e0bc4fd7289b879e76384ef0cb8d065 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 1 Feb 2012 20:26:28 -0800 Subject: [PATCH 0598/1442] ANDROID: input: misc: gpio_event: remove early suspend Remove the early suspend handler. Leave the suspend functions for now, they should eventually get called through a userspace interface.x Change-Id: I67f9dafe32fe32577bab93c42b95824db96c215c Signed-off-by: Colin Cross --- drivers/input/misc/gpio_event.c | 39 ++++++++------------------------- 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/drivers/input/misc/gpio_event.c b/drivers/input/misc/gpio_event.c index d4e5b4dfe19f..90f07eba3ce9 100644 --- a/drivers/input/misc/gpio_event.c +++ b/drivers/input/misc/gpio_event.c @@ -13,7 +13,6 @@ * */ -#include #include #include #include @@ -24,7 +23,6 @@ struct gpio_event { struct gpio_event_input_devs *input_devs; const struct gpio_event_platform_data *info; - struct early_suspend early_suspend; void *state[0]; }; @@ -101,23 +99,19 @@ static int gpio_event_call_all_func(struct gpio_event *ip, int func) return ret; } -#ifdef CONFIG_HAS_EARLYSUSPEND -void gpio_event_suspend(struct early_suspend *h) +static void __maybe_unused gpio_event_suspend(struct gpio_event *ip) { - struct gpio_event *ip; - ip = container_of(h, struct gpio_event, early_suspend); gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_SUSPEND); - ip->info->power(ip->info, 0); + if (ip->info->power) + ip->info->power(ip->info, 0); } -void gpio_event_resume(struct early_suspend *h) +static void __maybe_unused gpio_event_resume(struct gpio_event *ip) { - struct gpio_event *ip; - ip = container_of(h, struct gpio_event, early_suspend); - ip->info->power(ip->info, 1); + if (ip->info->power) + ip->info->power(ip->info, 1); gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_RESUME); } -#endif static int gpio_event_probe(struct platform_device *pdev) { @@ -169,15 +163,8 @@ static int gpio_event_probe(struct platform_device *pdev) } ip->input_devs->count = dev_count; ip->info = event_info; - if (event_info->power) { -#ifdef CONFIG_HAS_EARLYSUSPEND - ip->early_suspend.level = EARLY_SUSPEND_LEVEL_BLANK_SCREEN + 1; - ip->early_suspend.suspend = gpio_event_suspend; - ip->early_suspend.resume = gpio_event_resume; - register_early_suspend(&ip->early_suspend); -#endif + if (event_info->power) ip->info->power(ip->info, 1); - } err = gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_INIT); if (err) @@ -198,12 +185,8 @@ static int gpio_event_probe(struct platform_device *pdev) err_input_register_device_failed: gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_UNINIT); err_call_all_func_failed: - if (event_info->power) { -#ifdef CONFIG_HAS_EARLYSUSPEND - unregister_early_suspend(&ip->early_suspend); -#endif + if (event_info->power) ip->info->power(ip->info, 0); - } for (i = 0; i < registered; i++) input_unregister_device(ip->input_devs->dev[i]); for (i = dev_count - 1; i >= registered; i--) { @@ -222,12 +205,8 @@ static int gpio_event_remove(struct platform_device *pdev) int i; gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_UNINIT); - if (ip->info->power) { -#ifdef CONFIG_HAS_EARLYSUSPEND - unregister_early_suspend(&ip->early_suspend); -#endif + if (ip->info->power) ip->info->power(ip->info, 0); - } for (i = 0; i < ip->input_devs->count; i++) input_unregister_device(ip->input_devs->dev[i]); kfree(ip); -- GitLab From b172e89d374a3e151473ed5caf04e298107103a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Fri, 21 Nov 2008 21:47:23 -0800 Subject: [PATCH 0599/1442] ANDROID: input: Add keyreset driver. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a platform device in the board file to specify a reset key-combo. The first time the key-combo is detected a work function that syncs the filesystems is scheduled. If all the keys are released and then pressed again, it calls panic. Reboot on panic should be set for this to work. Change-Id: I9d54283ca1fba45e4b1ae1a407524cdda8171143 Signed-off-by: Arve Hjønnevåg --- drivers/input/Kconfig | 9 ++ drivers/input/Makefile | 1 + drivers/input/keyreset.c | 239 +++++++++++++++++++++++++++++++++++++++ include/linux/keyreset.h | 28 +++++ 4 files changed, 277 insertions(+) create mode 100644 drivers/input/keyreset.c create mode 100644 include/linux/keyreset.h diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index 6261874c07c9..525ed3a35fe0 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig @@ -187,6 +187,15 @@ config INPUT_APMPOWER To compile this driver as a module, choose M here: the module will be called apm-power. +config INPUT_KEYRESET + tristate "Reset key" + depends on INPUT + ---help--- + Say Y here if you want to reboot when some keys are pressed; + + To compile this driver as a module, choose M here: the + module will be called keyreset. + comment "Input Device Drivers" source "drivers/input/keyboard/Kconfig" diff --git a/drivers/input/Makefile b/drivers/input/Makefile index 595820bbabe9..f51b8e336ae1 100644 --- a/drivers/input/Makefile +++ b/drivers/input/Makefile @@ -26,5 +26,6 @@ obj-$(CONFIG_INPUT_TOUCHSCREEN) += touchscreen/ obj-$(CONFIG_INPUT_MISC) += misc/ obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o +obj-$(CONFIG_INPUT_KEYRESET) += keyreset.o obj-$(CONFIG_RMI4_CORE) += rmi4/ diff --git a/drivers/input/keyreset.c b/drivers/input/keyreset.c new file mode 100644 index 000000000000..36208fe0baae --- /dev/null +++ b/drivers/input/keyreset.c @@ -0,0 +1,239 @@ +/* drivers/input/keyreset.c + * + * Copyright (C) 2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +struct keyreset_state { + struct input_handler input_handler; + unsigned long keybit[BITS_TO_LONGS(KEY_CNT)]; + unsigned long upbit[BITS_TO_LONGS(KEY_CNT)]; + unsigned long key[BITS_TO_LONGS(KEY_CNT)]; + spinlock_t lock; + int key_down_target; + int key_down; + int key_up; + int restart_disabled; + int (*reset_fn)(void); +}; + +int restart_requested; +static void deferred_restart(struct work_struct *dummy) +{ + restart_requested = 2; + sys_sync(); + restart_requested = 3; + kernel_restart(NULL); +} +static DECLARE_WORK(restart_work, deferred_restart); + +static void keyreset_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + unsigned long flags; + struct keyreset_state *state = handle->private; + + if (type != EV_KEY) + return; + + if (code >= KEY_MAX) + return; + + if (!test_bit(code, state->keybit)) + return; + + spin_lock_irqsave(&state->lock, flags); + if (!test_bit(code, state->key) == !value) + goto done; + __change_bit(code, state->key); + if (test_bit(code, state->upbit)) { + if (value) { + state->restart_disabled = 1; + state->key_up++; + } else + state->key_up--; + } else { + if (value) + state->key_down++; + else + state->key_down--; + } + if (state->key_down == 0 && state->key_up == 0) + state->restart_disabled = 0; + + pr_debug("reset key changed %d %d new state %d-%d-%d\n", code, value, + state->key_down, state->key_up, state->restart_disabled); + + if (value && !state->restart_disabled && + state->key_down == state->key_down_target) { + state->restart_disabled = 1; + if (restart_requested) + panic("keyboard reset failed, %d", restart_requested); + if (state->reset_fn) { + restart_requested = state->reset_fn(); + } else { + pr_info("keyboard reset\n"); + schedule_work(&restart_work); + restart_requested = 1; + } + } +done: + spin_unlock_irqrestore(&state->lock, flags); +} + +static int keyreset_connect(struct input_handler *handler, + struct input_dev *dev, + const struct input_device_id *id) +{ + int i; + int ret; + struct input_handle *handle; + struct keyreset_state *state = + container_of(handler, struct keyreset_state, input_handler); + + for (i = 0; i < KEY_MAX; i++) { + if (test_bit(i, state->keybit) && test_bit(i, dev->keybit)) + break; + } + if (i == KEY_MAX) + return -ENODEV; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = "keyreset"; + handle->private = state; + + ret = input_register_handle(handle); + if (ret) + goto err_input_register_handle; + + ret = input_open_device(handle); + if (ret) + goto err_input_open_device; + + pr_info("using input dev %s for key reset\n", dev->name); + + return 0; + +err_input_open_device: + input_unregister_handle(handle); +err_input_register_handle: + kfree(handle); + return ret; +} + +static void keyreset_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} + +static const struct input_device_id keyreset_ids[] = { + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT, + .evbit = { BIT_MASK(EV_KEY) }, + }, + { }, +}; +MODULE_DEVICE_TABLE(input, keyreset_ids); + +static int keyreset_probe(struct platform_device *pdev) +{ + int ret; + int key, *keyp; + struct keyreset_state *state; + struct keyreset_platform_data *pdata = pdev->dev.platform_data; + + if (!pdata) + return -EINVAL; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + spin_lock_init(&state->lock); + keyp = pdata->keys_down; + while ((key = *keyp++)) { + if (key >= KEY_MAX) + continue; + state->key_down_target++; + __set_bit(key, state->keybit); + } + if (pdata->keys_up) { + keyp = pdata->keys_up; + while ((key = *keyp++)) { + if (key >= KEY_MAX) + continue; + __set_bit(key, state->keybit); + __set_bit(key, state->upbit); + } + } + + if (pdata->reset_fn) + state->reset_fn = pdata->reset_fn; + + state->input_handler.event = keyreset_event; + state->input_handler.connect = keyreset_connect; + state->input_handler.disconnect = keyreset_disconnect; + state->input_handler.name = KEYRESET_NAME; + state->input_handler.id_table = keyreset_ids; + ret = input_register_handler(&state->input_handler); + if (ret) { + kfree(state); + return ret; + } + platform_set_drvdata(pdev, state); + return 0; +} + +int keyreset_remove(struct platform_device *pdev) +{ + struct keyreset_state *state = platform_get_drvdata(pdev); + input_unregister_handler(&state->input_handler); + kfree(state); + return 0; +} + + +struct platform_driver keyreset_driver = { + .driver.name = KEYRESET_NAME, + .probe = keyreset_probe, + .remove = keyreset_remove, +}; + +static int __init keyreset_init(void) +{ + return platform_driver_register(&keyreset_driver); +} + +static void __exit keyreset_exit(void) +{ + return platform_driver_unregister(&keyreset_driver); +} + +module_init(keyreset_init); +module_exit(keyreset_exit); diff --git a/include/linux/keyreset.h b/include/linux/keyreset.h new file mode 100644 index 000000000000..a2ac49e5b684 --- /dev/null +++ b/include/linux/keyreset.h @@ -0,0 +1,28 @@ +/* + * include/linux/keyreset.h - platform data structure for resetkeys driver + * + * Copyright (C) 2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_KEYRESET_H +#define _LINUX_KEYRESET_H + +#define KEYRESET_NAME "keyreset" + +struct keyreset_platform_data { + int (*reset_fn)(void); + int *keys_up; + int keys_down[]; /* 0 terminated */ +}; + +#endif /* _LINUX_KEYRESET_H */ -- GitLab From 630a1e7f13fa870d27da61650f007f9b7e69d9e3 Mon Sep 17 00:00:00 2001 From: Mike Lockwood Date: Mon, 15 Dec 2008 14:51:56 -0500 Subject: [PATCH 0600/1442] ANDROID: input: keychord: Add keychord driver This driver allows userspace to receive notification when client specified key combinations are pressed. The client opens /dev/keychord and writes a list of keychords for the driver to monitor. The client then reads or polls /dev/keychord for notifications. A client specified ID for the keychord is returned from read() when a keychord press is detected. Signed-off-by: Mike Lockwood keychord: fix to build without CONFIG_PREEMPT Change-Id: I911f13aeda4224b6fa57863bc7e8972fec8837fb --- drivers/input/misc/Kconfig | 11 + drivers/input/misc/Makefile | 1 + drivers/input/misc/keychord.c | 387 ++++++++++++++++++++++++++++++++++ include/linux/keychord.h | 52 +++++ 4 files changed, 451 insertions(+) create mode 100644 drivers/input/misc/keychord.c create mode 100644 include/linux/keychord.h diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index ea1b5e441a96..94360fe63f41 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -367,6 +367,17 @@ config INPUT_ATI_REMOTE2 To compile this driver as a module, choose M here: the module will be called ati_remote2. +config INPUT_KEYCHORD + tristate "Key chord input driver support" + help + Say Y here if you want to enable the key chord driver + accessible at /dev/keychord. This driver can be used + for receiving notifications when client specified key + combinations are pressed. + + To compile this driver as a module, choose M here: the + module will be called keychord. + config INPUT_KEYSPAN_REMOTE tristate "Keyspan DMR USB remote control" depends on USB_ARCH_HAS_HCD diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index 13bf7dbba627..64bf231faf8c 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_INPUT_HISI_POWERKEY) += hisi_powerkey.o obj-$(CONFIG_HP_SDC_RTC) += hp_sdc_rtc.o obj-$(CONFIG_INPUT_IMS_PCU) += ims-pcu.o obj-$(CONFIG_INPUT_IXP4XX_BEEPER) += ixp4xx-beeper.o +obj-$(CONFIG_INPUT_KEYCHORD) += keychord.o obj-$(CONFIG_INPUT_KEYSPAN_REMOTE) += keyspan_remote.o obj-$(CONFIG_INPUT_KXTJ9) += kxtj9.o obj-$(CONFIG_INPUT_M68K_BEEP) += m68kspkr.o diff --git a/drivers/input/misc/keychord.c b/drivers/input/misc/keychord.c new file mode 100644 index 000000000000..3ffab6da411b --- /dev/null +++ b/drivers/input/misc/keychord.c @@ -0,0 +1,387 @@ +/* + * drivers/input/misc/keychord.c + * + * Copyright (C) 2008 Google, Inc. + * Author: Mike Lockwood + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define KEYCHORD_NAME "keychord" +#define BUFFER_SIZE 16 + +MODULE_AUTHOR("Mike Lockwood "); +MODULE_DESCRIPTION("Key chord input driver"); +MODULE_SUPPORTED_DEVICE("keychord"); +MODULE_LICENSE("GPL"); + +#define NEXT_KEYCHORD(kc) ((struct input_keychord *) \ + ((char *)kc + sizeof(struct input_keychord) + \ + kc->count * sizeof(kc->keycodes[0]))) + +struct keychord_device { + struct input_handler input_handler; + int registered; + + /* list of keychords to monitor */ + struct input_keychord *keychords; + int keychord_count; + + /* bitmask of keys contained in our keychords */ + unsigned long keybit[BITS_TO_LONGS(KEY_CNT)]; + /* current state of the keys */ + unsigned long keystate[BITS_TO_LONGS(KEY_CNT)]; + /* number of keys that are currently pressed */ + int key_down; + + /* second input_device_id is needed for null termination */ + struct input_device_id device_ids[2]; + + spinlock_t lock; + wait_queue_head_t waitq; + unsigned char head; + unsigned char tail; + __u16 buff[BUFFER_SIZE]; +}; + +static int check_keychord(struct keychord_device *kdev, + struct input_keychord *keychord) +{ + int i; + + if (keychord->count != kdev->key_down) + return 0; + + for (i = 0; i < keychord->count; i++) { + if (!test_bit(keychord->keycodes[i], kdev->keystate)) + return 0; + } + + /* we have a match */ + return 1; +} + +static void keychord_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + struct keychord_device *kdev = handle->private; + struct input_keychord *keychord; + unsigned long flags; + int i, got_chord = 0; + + if (type != EV_KEY || code >= KEY_MAX) + return; + + spin_lock_irqsave(&kdev->lock, flags); + /* do nothing if key state did not change */ + if (!test_bit(code, kdev->keystate) == !value) + goto done; + __change_bit(code, kdev->keystate); + if (value) + kdev->key_down++; + else + kdev->key_down--; + + /* don't notify on key up */ + if (!value) + goto done; + /* ignore this event if it is not one of the keys we are monitoring */ + if (!test_bit(code, kdev->keybit)) + goto done; + + keychord = kdev->keychords; + if (!keychord) + goto done; + + /* check to see if the keyboard state matches any keychords */ + for (i = 0; i < kdev->keychord_count; i++) { + if (check_keychord(kdev, keychord)) { + kdev->buff[kdev->head] = keychord->id; + kdev->head = (kdev->head + 1) % BUFFER_SIZE; + got_chord = 1; + break; + } + /* skip to next keychord */ + keychord = NEXT_KEYCHORD(keychord); + } + +done: + spin_unlock_irqrestore(&kdev->lock, flags); + + if (got_chord) + wake_up_interruptible(&kdev->waitq); +} + +static int keychord_connect(struct input_handler *handler, + struct input_dev *dev, + const struct input_device_id *id) +{ + int i, ret; + struct input_handle *handle; + struct keychord_device *kdev = + container_of(handler, struct keychord_device, input_handler); + + /* + * ignore this input device if it does not contain any keycodes + * that we are monitoring + */ + for (i = 0; i < KEY_MAX; i++) { + if (test_bit(i, kdev->keybit) && test_bit(i, dev->keybit)) + break; + } + if (i == KEY_MAX) + return -ENODEV; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = KEYCHORD_NAME; + handle->private = kdev; + + ret = input_register_handle(handle); + if (ret) + goto err_input_register_handle; + + ret = input_open_device(handle); + if (ret) + goto err_input_open_device; + + pr_info("keychord: using input dev %s for fevent\n", dev->name); + + return 0; + +err_input_open_device: + input_unregister_handle(handle); +err_input_register_handle: + kfree(handle); + return ret; +} + +static void keychord_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} + +/* + * keychord_read is used to read keychord events from the driver + */ +static ssize_t keychord_read(struct file *file, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct keychord_device *kdev = file->private_data; + __u16 id; + int retval; + unsigned long flags; + + if (count < sizeof(id)) + return -EINVAL; + count = sizeof(id); + + if (kdev->head == kdev->tail && (file->f_flags & O_NONBLOCK)) + return -EAGAIN; + + retval = wait_event_interruptible(kdev->waitq, + kdev->head != kdev->tail); + if (retval) + return retval; + + spin_lock_irqsave(&kdev->lock, flags); + /* pop a keychord ID off the queue */ + id = kdev->buff[kdev->tail]; + kdev->tail = (kdev->tail + 1) % BUFFER_SIZE; + spin_unlock_irqrestore(&kdev->lock, flags); + + if (copy_to_user(buffer, &id, count)) + return -EFAULT; + + return count; +} + +/* + * keychord_write is used to configure the driver + */ +static ssize_t keychord_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct keychord_device *kdev = file->private_data; + struct input_keychord *keychords = 0; + struct input_keychord *keychord, *next, *end; + int ret, i, key; + unsigned long flags; + + if (count < sizeof(struct input_keychord)) + return -EINVAL; + keychords = kzalloc(count, GFP_KERNEL); + if (!keychords) + return -ENOMEM; + + /* read list of keychords from userspace */ + if (copy_from_user(keychords, buffer, count)) { + kfree(keychords); + return -EFAULT; + } + + /* unregister handler before changing configuration */ + if (kdev->registered) { + input_unregister_handler(&kdev->input_handler); + kdev->registered = 0; + } + + spin_lock_irqsave(&kdev->lock, flags); + /* clear any existing configuration */ + kfree(kdev->keychords); + kdev->keychords = 0; + kdev->keychord_count = 0; + kdev->key_down = 0; + memset(kdev->keybit, 0, sizeof(kdev->keybit)); + memset(kdev->keystate, 0, sizeof(kdev->keystate)); + kdev->head = kdev->tail = 0; + + keychord = keychords; + end = (struct input_keychord *)((char *)keychord + count); + + while (keychord < end) { + next = NEXT_KEYCHORD(keychord); + if (keychord->count <= 0 || next > end) { + pr_err("keychord: invalid keycode count %d\n", + keychord->count); + goto err_unlock_return; + } + if (keychord->version != KEYCHORD_VERSION) { + pr_err("keychord: unsupported version %d\n", + keychord->version); + goto err_unlock_return; + } + + /* keep track of the keys we are monitoring in keybit */ + for (i = 0; i < keychord->count; i++) { + key = keychord->keycodes[i]; + if (key < 0 || key >= KEY_CNT) { + pr_err("keychord: keycode %d out of range\n", + key); + goto err_unlock_return; + } + __set_bit(key, kdev->keybit); + } + + kdev->keychord_count++; + keychord = next; + } + + kdev->keychords = keychords; + spin_unlock_irqrestore(&kdev->lock, flags); + + ret = input_register_handler(&kdev->input_handler); + if (ret) { + kfree(keychords); + kdev->keychords = 0; + return ret; + } + kdev->registered = 1; + + return count; + +err_unlock_return: + spin_unlock_irqrestore(&kdev->lock, flags); + kfree(keychords); + return -EINVAL; +} + +static unsigned int keychord_poll(struct file *file, poll_table *wait) +{ + struct keychord_device *kdev = file->private_data; + + poll_wait(file, &kdev->waitq, wait); + + if (kdev->head != kdev->tail) + return POLLIN | POLLRDNORM; + + return 0; +} + +static int keychord_open(struct inode *inode, struct file *file) +{ + struct keychord_device *kdev; + + kdev = kzalloc(sizeof(struct keychord_device), GFP_KERNEL); + if (!kdev) + return -ENOMEM; + + spin_lock_init(&kdev->lock); + init_waitqueue_head(&kdev->waitq); + + kdev->input_handler.event = keychord_event; + kdev->input_handler.connect = keychord_connect; + kdev->input_handler.disconnect = keychord_disconnect; + kdev->input_handler.name = KEYCHORD_NAME; + kdev->input_handler.id_table = kdev->device_ids; + + kdev->device_ids[0].flags = INPUT_DEVICE_ID_MATCH_EVBIT; + __set_bit(EV_KEY, kdev->device_ids[0].evbit); + + file->private_data = kdev; + + return 0; +} + +static int keychord_release(struct inode *inode, struct file *file) +{ + struct keychord_device *kdev = file->private_data; + + if (kdev->registered) + input_unregister_handler(&kdev->input_handler); + kfree(kdev); + + return 0; +} + +static const struct file_operations keychord_fops = { + .owner = THIS_MODULE, + .open = keychord_open, + .release = keychord_release, + .read = keychord_read, + .write = keychord_write, + .poll = keychord_poll, +}; + +static struct miscdevice keychord_misc = { + .fops = &keychord_fops, + .name = KEYCHORD_NAME, + .minor = MISC_DYNAMIC_MINOR, +}; + +static int __init keychord_init(void) +{ + return misc_register(&keychord_misc); +} + +static void __exit keychord_exit(void) +{ + misc_deregister(&keychord_misc); +} + +module_init(keychord_init); +module_exit(keychord_exit); diff --git a/include/linux/keychord.h b/include/linux/keychord.h new file mode 100644 index 000000000000..856a5850217b --- /dev/null +++ b/include/linux/keychord.h @@ -0,0 +1,52 @@ +/* + * Key chord input driver + * + * Copyright (C) 2008 Google, Inc. + * Author: Mike Lockwood + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * +*/ + +#ifndef __LINUX_KEYCHORD_H_ +#define __LINUX_KEYCHORD_H_ + +#include + +#define KEYCHORD_VERSION 1 + +/* + * One or more input_keychord structs are written to /dev/keychord + * at once to specify the list of keychords to monitor. + * Reading /dev/keychord returns the id of a keychord when the + * keychord combination is pressed. A keychord is signalled when + * all of the keys in the keycode list are in the pressed state. + * The order in which the keys are pressed does not matter. + * The keychord will not be signalled if keys not in the keycode + * list are pressed. + * Keychords will not be signalled on key release events. + */ +struct input_keychord { + /* should be KEYCHORD_VERSION */ + __u16 version; + /* + * client specified ID, returned from read() + * when this keychord is pressed. + */ + __u16 id; + + /* number of keycodes in this keychord */ + __u16 count; + + /* variable length array of keycodes */ + __u16 keycodes[]; +}; + +#endif /* __LINUX_KEYCHORD_H_ */ -- GitLab From c4be12ace11bc41efcb95dd836a692fd4f55e663 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Tue, 5 Mar 2013 14:25:36 -0800 Subject: [PATCH 0601/1442] ANDROID: input: misc: keychord: log when keychord triggered log keychord id at info level just before waking up processes. Signed-off-by: JP Abgrall --- drivers/input/misc/keychord.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/input/misc/keychord.c b/drivers/input/misc/keychord.c index 3ffab6da411b..a5ea27ad0e16 100644 --- a/drivers/input/misc/keychord.c +++ b/drivers/input/misc/keychord.c @@ -126,8 +126,12 @@ static void keychord_event(struct input_handle *handle, unsigned int type, done: spin_unlock_irqrestore(&kdev->lock, flags); - if (got_chord) + if (got_chord) { + pr_info("keychord: got keychord id %d. Any tasks: %d\n", + keychord->id, + !list_empty_careful(&kdev->waitq.task_list)); wake_up_interruptible(&kdev->waitq); + } } static int keychord_connect(struct input_handler *handler, -- GitLab From f4d1cf1208fee98695ec39b46ff54c7274282dc5 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Thu, 7 Nov 2013 12:46:33 -0800 Subject: [PATCH 0602/1442] ANDROID: input: misc: keychord: move header to uapi Move the entire contents of linux/keychord.h header to uapi, it only contains a userspace interface. Change-Id: If94f83328b19efb58c66391dce3bd8e927788d8d Signed-off-by: Colin Cross --- include/linux/keychord.h | 31 +-------------------- include/uapi/linux/keychord.h | 52 +++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 30 deletions(-) create mode 100644 include/uapi/linux/keychord.h diff --git a/include/linux/keychord.h b/include/linux/keychord.h index 856a5850217b..08cf5402102c 100644 --- a/include/linux/keychord.h +++ b/include/linux/keychord.h @@ -18,35 +18,6 @@ #ifndef __LINUX_KEYCHORD_H_ #define __LINUX_KEYCHORD_H_ -#include - -#define KEYCHORD_VERSION 1 - -/* - * One or more input_keychord structs are written to /dev/keychord - * at once to specify the list of keychords to monitor. - * Reading /dev/keychord returns the id of a keychord when the - * keychord combination is pressed. A keychord is signalled when - * all of the keys in the keycode list are in the pressed state. - * The order in which the keys are pressed does not matter. - * The keychord will not be signalled if keys not in the keycode - * list are pressed. - * Keychords will not be signalled on key release events. - */ -struct input_keychord { - /* should be KEYCHORD_VERSION */ - __u16 version; - /* - * client specified ID, returned from read() - * when this keychord is pressed. - */ - __u16 id; - - /* number of keycodes in this keychord */ - __u16 count; - - /* variable length array of keycodes */ - __u16 keycodes[]; -}; +#include #endif /* __LINUX_KEYCHORD_H_ */ diff --git a/include/uapi/linux/keychord.h b/include/uapi/linux/keychord.h new file mode 100644 index 000000000000..ea7cf4d27bbd --- /dev/null +++ b/include/uapi/linux/keychord.h @@ -0,0 +1,52 @@ +/* + * Key chord input driver + * + * Copyright (C) 2008 Google, Inc. + * Author: Mike Lockwood + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * +*/ + +#ifndef _UAPI_LINUX_KEYCHORD_H_ +#define _UAPI_LINUX_KEYCHORD_H_ + +#include + +#define KEYCHORD_VERSION 1 + +/* + * One or more input_keychord structs are written to /dev/keychord + * at once to specify the list of keychords to monitor. + * Reading /dev/keychord returns the id of a keychord when the + * keychord combination is pressed. A keychord is signalled when + * all of the keys in the keycode list are in the pressed state. + * The order in which the keys are pressed does not matter. + * The keychord will not be signalled if keys not in the keycode + * list are pressed. + * Keychords will not be signalled on key release events. + */ +struct input_keychord { + /* should be KEYCHORD_VERSION */ + __u16 version; + /* + * client specified ID, returned from read() + * when this keychord is pressed. + */ + __u16 id; + + /* number of keycodes in this keychord */ + __u16 count; + + /* variable length array of keycodes */ + __u16 keycodes[]; +}; + +#endif /* _UAPI_LINUX_KEYCHORD_H_ */ -- GitLab From e03e4e2a3f0b67017df789b39af4926f84a40c3c Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 7 May 2014 16:52:10 -0700 Subject: [PATCH 0603/1442] ANDROID: input: add keycombo, a general key combo driver. Keycombo lets you provide a key up and key down function, and an optional time delay for key down. The driver will call the key down function after the specified key combo has been held for the speicified time delay. After you release the combo, if the key down has happened, it calls key up. Change-Id: I6a9a94e96a8f58fadd908fd1dc7944b9102a089f Signed-off-by: Daniel Rosenberg --- drivers/input/Kconfig | 9 ++ drivers/input/Makefile | 1 + drivers/input/keycombo.c | 261 +++++++++++++++++++++++++++++++++++++++ include/linux/keycombo.h | 36 ++++++ 4 files changed, 307 insertions(+) create mode 100644 drivers/input/keycombo.c create mode 100644 include/linux/keycombo.h diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index 525ed3a35fe0..938c341c183a 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig @@ -196,6 +196,15 @@ config INPUT_KEYRESET To compile this driver as a module, choose M here: the module will be called keyreset. +config INPUT_KEYCOMBO + tristate "Key combo" + depends on INPUT + ---help--- + Say Y here if you want to take action when some keys are pressed; + + To compile this driver as a module, choose M here: the + module will be called keycombo. + comment "Input Device Drivers" source "drivers/input/keyboard/Kconfig" diff --git a/drivers/input/Makefile b/drivers/input/Makefile index f51b8e336ae1..6a3281ca3306 100644 --- a/drivers/input/Makefile +++ b/drivers/input/Makefile @@ -27,5 +27,6 @@ obj-$(CONFIG_INPUT_MISC) += misc/ obj-$(CONFIG_INPUT_APMPOWER) += apm-power.o obj-$(CONFIG_INPUT_KEYRESET) += keyreset.o +obj-$(CONFIG_INPUT_KEYCOMBO) += keycombo.o obj-$(CONFIG_RMI4_CORE) += rmi4/ diff --git a/drivers/input/keycombo.c b/drivers/input/keycombo.c new file mode 100644 index 000000000000..2fba451b91d5 --- /dev/null +++ b/drivers/input/keycombo.c @@ -0,0 +1,261 @@ +/* drivers/input/keycombo.c + * + * Copyright (C) 2014 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct keycombo_state { + struct input_handler input_handler; + unsigned long keybit[BITS_TO_LONGS(KEY_CNT)]; + unsigned long upbit[BITS_TO_LONGS(KEY_CNT)]; + unsigned long key[BITS_TO_LONGS(KEY_CNT)]; + spinlock_t lock; + struct workqueue_struct *wq; + int key_down_target; + int key_down; + int key_up; + struct delayed_work key_down_work; + int delay; + struct work_struct key_up_work; + void (*key_up_fn)(void *); + void (*key_down_fn)(void *); + void *priv; + int key_is_down; + struct wakeup_source combo_held_wake_source; + struct wakeup_source combo_up_wake_source; +}; + +static void do_key_down(struct work_struct *work) +{ + struct delayed_work *dwork = container_of(work, struct delayed_work, + work); + struct keycombo_state *state = container_of(dwork, + struct keycombo_state, key_down_work); + if (state->key_down_fn) + state->key_down_fn(state->priv); +} + +static void do_key_up(struct work_struct *work) +{ + struct keycombo_state *state = container_of(work, struct keycombo_state, + key_up_work); + if (state->key_up_fn) + state->key_up_fn(state->priv); + __pm_relax(&state->combo_up_wake_source); +} + +static void keycombo_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + unsigned long flags; + struct keycombo_state *state = handle->private; + + if (type != EV_KEY) + return; + + if (code >= KEY_MAX) + return; + + if (!test_bit(code, state->keybit)) + return; + + spin_lock_irqsave(&state->lock, flags); + if (!test_bit(code, state->key) == !value) + goto done; + __change_bit(code, state->key); + if (test_bit(code, state->upbit)) { + if (value) + state->key_up++; + else + state->key_up--; + } else { + if (value) + state->key_down++; + else + state->key_down--; + } + if (state->key_down == state->key_down_target && state->key_up == 0) { + __pm_stay_awake(&state->combo_held_wake_source); + state->key_is_down = 1; + if (queue_delayed_work(state->wq, &state->key_down_work, + state->delay)) + pr_debug("Key down work already queued!"); + } else if (state->key_is_down) { + if (!cancel_delayed_work(&state->key_down_work)) { + __pm_stay_awake(&state->combo_up_wake_source); + queue_work(state->wq, &state->key_up_work); + } + __pm_relax(&state->combo_held_wake_source); + state->key_is_down = 0; + } +done: + spin_unlock_irqrestore(&state->lock, flags); +} + +static int keycombo_connect(struct input_handler *handler, + struct input_dev *dev, + const struct input_device_id *id) +{ + int i; + int ret; + struct input_handle *handle; + struct keycombo_state *state = + container_of(handler, struct keycombo_state, input_handler); + for (i = 0; i < KEY_MAX; i++) { + if (test_bit(i, state->keybit) && test_bit(i, dev->keybit)) + break; + } + if (i == KEY_MAX) + return -ENODEV; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = KEYCOMBO_NAME; + handle->private = state; + + ret = input_register_handle(handle); + if (ret) + goto err_input_register_handle; + + ret = input_open_device(handle); + if (ret) + goto err_input_open_device; + + return 0; + +err_input_open_device: + input_unregister_handle(handle); +err_input_register_handle: + kfree(handle); + return ret; +} + +static void keycombo_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} + +static const struct input_device_id keycombo_ids[] = { + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT, + .evbit = { BIT_MASK(EV_KEY) }, + }, + { }, +}; +MODULE_DEVICE_TABLE(input, keycombo_ids); + +static int keycombo_probe(struct platform_device *pdev) +{ + int ret; + int key, *keyp; + struct keycombo_state *state; + struct keycombo_platform_data *pdata = pdev->dev.platform_data; + + if (!pdata) + return -EINVAL; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + spin_lock_init(&state->lock); + keyp = pdata->keys_down; + while ((key = *keyp++)) { + if (key >= KEY_MAX) + continue; + state->key_down_target++; + __set_bit(key, state->keybit); + } + if (pdata->keys_up) { + keyp = pdata->keys_up; + while ((key = *keyp++)) { + if (key >= KEY_MAX) + continue; + __set_bit(key, state->keybit); + __set_bit(key, state->upbit); + } + } + + state->wq = alloc_ordered_workqueue("keycombo", 0); + if (!state->wq) + return -ENOMEM; + + state->priv = pdata->priv; + + if (pdata->key_down_fn) + state->key_down_fn = pdata->key_down_fn; + INIT_DELAYED_WORK(&state->key_down_work, do_key_down); + + if (pdata->key_up_fn) + state->key_up_fn = pdata->key_up_fn; + INIT_WORK(&state->key_up_work, do_key_up); + + wakeup_source_init(&state->combo_held_wake_source, "key combo"); + wakeup_source_init(&state->combo_up_wake_source, "key combo up"); + state->delay = msecs_to_jiffies(pdata->key_down_delay); + + state->input_handler.event = keycombo_event; + state->input_handler.connect = keycombo_connect; + state->input_handler.disconnect = keycombo_disconnect; + state->input_handler.name = KEYCOMBO_NAME; + state->input_handler.id_table = keycombo_ids; + ret = input_register_handler(&state->input_handler); + if (ret) { + kfree(state); + return ret; + } + platform_set_drvdata(pdev, state); + return 0; +} + +int keycombo_remove(struct platform_device *pdev) +{ + struct keycombo_state *state = platform_get_drvdata(pdev); + input_unregister_handler(&state->input_handler); + destroy_workqueue(state->wq); + kfree(state); + return 0; +} + + +struct platform_driver keycombo_driver = { + .driver.name = KEYCOMBO_NAME, + .probe = keycombo_probe, + .remove = keycombo_remove, +}; + +static int __init keycombo_init(void) +{ + return platform_driver_register(&keycombo_driver); +} + +static void __exit keycombo_exit(void) +{ + return platform_driver_unregister(&keycombo_driver); +} + +module_init(keycombo_init); +module_exit(keycombo_exit); diff --git a/include/linux/keycombo.h b/include/linux/keycombo.h new file mode 100644 index 000000000000..c6db2626b0d3 --- /dev/null +++ b/include/linux/keycombo.h @@ -0,0 +1,36 @@ +/* + * include/linux/keycombo.h - platform data structure for keycombo driver + * + * Copyright (C) 2014 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_KEYCOMBO_H +#define _LINUX_KEYCOMBO_H + +#define KEYCOMBO_NAME "keycombo" + +/* + * if key_down_fn and key_up_fn are both present, you are guaranteed that + * key_down_fn will return before key_up_fn is called, and that key_up_fn + * is called iff key_down_fn is called. + */ +struct keycombo_platform_data { + void (*key_down_fn)(void *); + void (*key_up_fn)(void *); + void *priv; + int key_down_delay; /* Time in ms */ + int *keys_up; + int keys_down[]; /* 0 terminated */ +}; + +#endif /* _LINUX_KEYCOMBO_H */ -- GitLab From 6e92697b873cfe11dea499fc572a72d78086b69a Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 7 May 2014 14:17:47 -0700 Subject: [PATCH 0604/1442] ANDROID: input: Changed keyreset to act as a wrapper for keycombo. keyreset now registers a keycombo driver that acts as the old keyreset driver acted. Change-Id: I08f5279e3a33b267571b699697f9f54508868983 Signed-off-by: Daniel Rosenberg --- drivers/input/Kconfig | 1 + drivers/input/keyreset.c | 206 +++++++++++---------------------------- include/linux/keyreset.h | 3 +- 3 files changed, 58 insertions(+), 152 deletions(-) diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index 938c341c183a..5d954cc8cab5 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig @@ -190,6 +190,7 @@ config INPUT_APMPOWER config INPUT_KEYRESET tristate "Reset key" depends on INPUT + select INPUT_KEYCOMBO ---help--- Say Y here if you want to reboot when some keys are pressed; diff --git a/drivers/input/keyreset.c b/drivers/input/keyreset.c index 36208fe0baae..eaaccde82210 100644 --- a/drivers/input/keyreset.c +++ b/drivers/input/keyreset.c @@ -1,6 +1,6 @@ /* drivers/input/keyreset.c * - * Copyright (C) 2008 Google, Inc. + * Copyright (C) 2014 Google, Inc. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -21,200 +21,104 @@ #include #include #include - +#include struct keyreset_state { - struct input_handler input_handler; - unsigned long keybit[BITS_TO_LONGS(KEY_CNT)]; - unsigned long upbit[BITS_TO_LONGS(KEY_CNT)]; - unsigned long key[BITS_TO_LONGS(KEY_CNT)]; - spinlock_t lock; - int key_down_target; - int key_down; - int key_up; - int restart_disabled; + int restart_requested; int (*reset_fn)(void); + struct platform_device *pdev_child; }; -int restart_requested; -static void deferred_restart(struct work_struct *dummy) +static void do_restart(void) { - restart_requested = 2; sys_sync(); - restart_requested = 3; kernel_restart(NULL); } -static DECLARE_WORK(restart_work, deferred_restart); -static void keyreset_event(struct input_handle *handle, unsigned int type, - unsigned int code, int value) +static void do_reset_fn(void *priv) { - unsigned long flags; - struct keyreset_state *state = handle->private; - - if (type != EV_KEY) - return; - - if (code >= KEY_MAX) - return; - - if (!test_bit(code, state->keybit)) - return; - - spin_lock_irqsave(&state->lock, flags); - if (!test_bit(code, state->key) == !value) - goto done; - __change_bit(code, state->key); - if (test_bit(code, state->upbit)) { - if (value) { - state->restart_disabled = 1; - state->key_up++; - } else - state->key_up--; + struct keyreset_state *state = priv; + if (state->restart_requested) + panic("keyboard reset failed, %d", state->restart_requested); + if (state->reset_fn) { + state->restart_requested = state->reset_fn(); } else { - if (value) - state->key_down++; - else - state->key_down--; + pr_info("keyboard reset\n"); + do_restart(); + state->restart_requested = 1; } - if (state->key_down == 0 && state->key_up == 0) - state->restart_disabled = 0; - - pr_debug("reset key changed %d %d new state %d-%d-%d\n", code, value, - state->key_down, state->key_up, state->restart_disabled); - - if (value && !state->restart_disabled && - state->key_down == state->key_down_target) { - state->restart_disabled = 1; - if (restart_requested) - panic("keyboard reset failed, %d", restart_requested); - if (state->reset_fn) { - restart_requested = state->reset_fn(); - } else { - pr_info("keyboard reset\n"); - schedule_work(&restart_work); - restart_requested = 1; - } - } -done: - spin_unlock_irqrestore(&state->lock, flags); } -static int keyreset_connect(struct input_handler *handler, - struct input_dev *dev, - const struct input_device_id *id) -{ - int i; - int ret; - struct input_handle *handle; - struct keyreset_state *state = - container_of(handler, struct keyreset_state, input_handler); - - for (i = 0; i < KEY_MAX; i++) { - if (test_bit(i, state->keybit) && test_bit(i, dev->keybit)) - break; - } - if (i == KEY_MAX) - return -ENODEV; - - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - - handle->dev = dev; - handle->handler = handler; - handle->name = "keyreset"; - handle->private = state; - - ret = input_register_handle(handle); - if (ret) - goto err_input_register_handle; - - ret = input_open_device(handle); - if (ret) - goto err_input_open_device; - - pr_info("using input dev %s for key reset\n", dev->name); - - return 0; - -err_input_open_device: - input_unregister_handle(handle); -err_input_register_handle: - kfree(handle); - return ret; -} - -static void keyreset_disconnect(struct input_handle *handle) -{ - input_close_device(handle); - input_unregister_handle(handle); - kfree(handle); -} - -static const struct input_device_id keyreset_ids[] = { - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT, - .evbit = { BIT_MASK(EV_KEY) }, - }, - { }, -}; -MODULE_DEVICE_TABLE(input, keyreset_ids); - static int keyreset_probe(struct platform_device *pdev) { - int ret; + int ret = -ENOMEM; + struct keycombo_platform_data *pdata_child; + struct keyreset_platform_data *pdata = pdev->dev.platform_data; + int up_size = 0, down_size = 0, size; int key, *keyp; struct keyreset_state *state; - struct keyreset_platform_data *pdata = pdev->dev.platform_data; if (!pdata) return -EINVAL; - - state = kzalloc(sizeof(*state), GFP_KERNEL); + state = devm_kzalloc(&pdev->dev, sizeof(*state), GFP_KERNEL); if (!state) return -ENOMEM; - spin_lock_init(&state->lock); + state->pdev_child = platform_device_alloc(KEYCOMBO_NAME, + PLATFORM_DEVID_AUTO); + if (!state->pdev_child) + return -ENOMEM; + state->pdev_child->dev.parent = &pdev->dev; + keyp = pdata->keys_down; while ((key = *keyp++)) { if (key >= KEY_MAX) continue; - state->key_down_target++; - __set_bit(key, state->keybit); + down_size++; } if (pdata->keys_up) { keyp = pdata->keys_up; while ((key = *keyp++)) { if (key >= KEY_MAX) continue; - __set_bit(key, state->keybit); - __set_bit(key, state->upbit); + up_size++; } } - - if (pdata->reset_fn) - state->reset_fn = pdata->reset_fn; - - state->input_handler.event = keyreset_event; - state->input_handler.connect = keyreset_connect; - state->input_handler.disconnect = keyreset_disconnect; - state->input_handler.name = KEYRESET_NAME; - state->input_handler.id_table = keyreset_ids; - ret = input_register_handler(&state->input_handler); - if (ret) { - kfree(state); - return ret; + size = sizeof(struct keycombo_platform_data) + + sizeof(int) * (down_size + 1); + pdata_child = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + if (!pdata_child) + goto error; + memcpy(pdata_child->keys_down, pdata->keys_down, + sizeof(int) * down_size); + if (up_size > 0) { + pdata_child->keys_up = devm_kzalloc(&pdev->dev, up_size + 1, + GFP_KERNEL); + if (!pdata_child->keys_up) + goto error; + memcpy(pdata_child->keys_up, pdata->keys_up, + sizeof(int) * up_size); + if (!pdata_child->keys_up) + goto error; } + state->reset_fn = pdata->reset_fn; + pdata_child->key_down_fn = do_reset_fn; + pdata_child->priv = state; + pdata_child->key_down_delay = pdata->key_down_delay; + ret = platform_device_add_data(state->pdev_child, pdata_child, size); + if (ret) + goto error; platform_set_drvdata(pdev, state); - return 0; + return platform_device_add(state->pdev_child); +error: + platform_device_put(state->pdev_child); + return ret; } int keyreset_remove(struct platform_device *pdev) { struct keyreset_state *state = platform_get_drvdata(pdev); - input_unregister_handler(&state->input_handler); - kfree(state); + platform_device_put(state->pdev_child); return 0; } diff --git a/include/linux/keyreset.h b/include/linux/keyreset.h index a2ac49e5b684..2e34afab65e4 100644 --- a/include/linux/keyreset.h +++ b/include/linux/keyreset.h @@ -1,7 +1,7 @@ /* * include/linux/keyreset.h - platform data structure for resetkeys driver * - * Copyright (C) 2008 Google, Inc. + * Copyright (C) 2014 Google, Inc. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -21,6 +21,7 @@ struct keyreset_platform_data { int (*reset_fn)(void); + int key_down_delay; int *keys_up; int keys_down[]; /* 0 terminated */ }; -- GitLab From 455a52a01718c6b540c470420a590b21be22ca87 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Fri, 27 Jun 2014 16:39:35 -0700 Subject: [PATCH 0605/1442] ANDROID: input: Made keyreset more robust Switched do_restart to run in a seperate workqueue to handle cases where kernel_restart hangs. Change-Id: I1ecd61f8d0859f1a86d37c692351d644b5db9c69 Signed-off-by: Daniel Rosenberg --- drivers/input/keyreset.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/keyreset.c b/drivers/input/keyreset.c index eaaccde82210..7fbf7247e65f 100644 --- a/drivers/input/keyreset.c +++ b/drivers/input/keyreset.c @@ -27,9 +27,10 @@ struct keyreset_state { int restart_requested; int (*reset_fn)(void); struct platform_device *pdev_child; + struct work_struct restart_work; }; -static void do_restart(void) +static void do_restart(struct work_struct *unused) { sys_sync(); kernel_restart(NULL); @@ -44,7 +45,7 @@ static void do_reset_fn(void *priv) state->restart_requested = state->reset_fn(); } else { pr_info("keyboard reset\n"); - do_restart(); + schedule_work(&state->restart_work); state->restart_requested = 1; } } @@ -69,6 +70,7 @@ static int keyreset_probe(struct platform_device *pdev) if (!state->pdev_child) return -ENOMEM; state->pdev_child->dev.parent = &pdev->dev; + INIT_WORK(&state->restart_work, do_restart); keyp = pdata->keys_down; while ((key = *keyp++)) { -- GitLab From bffb6b8c35a847348cf5d2ba3b21ca58b71b1b39 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Tue, 19 Jun 2012 21:06:47 -0700 Subject: [PATCH 0606/1442] ANDROID: gpio_input: convert from wakelocks to wakeup sources And add device names to wakeup source names Change-Id: Ia5f2723319a2e749f00d6ec7d846edff6af6d5c2 Signed-off-by: Todd Poynor --- drivers/input/misc/gpio_input.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/input/misc/gpio_input.c b/drivers/input/misc/gpio_input.c index 6a0c31510968..eefd02725aff 100644 --- a/drivers/input/misc/gpio_input.c +++ b/drivers/input/misc/gpio_input.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include enum { DEBOUNCE_UNSTABLE = BIT(0), /* Got irq, while debouncing */ @@ -45,7 +45,7 @@ struct gpio_input_state { int use_irq; int debounce_count; spinlock_t irq_lock; - struct wake_lock wake_lock; + struct wakeup_source *ws; struct gpio_key_state key_state[0]; }; @@ -153,7 +153,7 @@ static enum hrtimer_restart gpio_event_input_timer_func(struct hrtimer *timer) else if (!ds->use_irq) hrtimer_start(timer, ds->info->poll_time, HRTIMER_MODE_REL); else - wake_unlock(&ds->wake_lock); + __pm_relax(ds->ws); spin_unlock_irqrestore(&ds->irq_lock, irqflags); @@ -179,7 +179,7 @@ static irqreturn_t gpio_event_input_irq_handler(int irq, void *dev_id) if (ks->debounce & DEBOUNCE_WAIT_IRQ) { ks->debounce = DEBOUNCE_UNKNOWN; if (ds->debounce_count++ == 0) { - wake_lock(&ds->wake_lock); + __pm_stay_awake(ds->ws); hrtimer_start( &ds->timer, ds->info->debounce_time, HRTIMER_MODE_REL); @@ -262,6 +262,7 @@ int gpio_event_input_func(struct gpio_event_input_devs *input_devs, unsigned long irqflags; struct gpio_event_input_info *di; struct gpio_input_state *ds = *data; + char *wlname; di = container_of(info, struct gpio_event_input_info, info); @@ -297,7 +298,19 @@ int gpio_event_input_func(struct gpio_event_input_devs *input_devs, ds->debounce_count = di->keymap_size; ds->input_devs = input_devs; ds->info = di; - wake_lock_init(&ds->wake_lock, WAKE_LOCK_SUSPEND, "gpio_input"); + wlname = kasprintf(GFP_KERNEL, "gpio_input:%s%s", + input_devs->dev[0]->name, + (input_devs->count > 1) ? "..." : ""); + + ds->ws = wakeup_source_register(wlname); + kfree(wlname); + if (!ds->ws) { + ret = -ENOMEM; + pr_err("gpio_event_input_func: " + "Failed to allocate wakeup source\n"); + goto err_ws_failed; + } + spin_lock_init(&ds->irq_lock); for (i = 0; i < di->keymap_size; i++) { @@ -369,7 +382,8 @@ int gpio_event_input_func(struct gpio_event_input_devs *input_devs, ; } err_bad_keymap: - wake_lock_destroy(&ds->wake_lock); + wakeup_source_unregister(ds->ws); +err_ws_failed: kfree(ds); err_ds_alloc_failed: return ret; -- GitLab From 402037818bd2fe63daaf8354e1b4f70089a9cfa2 Mon Sep 17 00:00:00 2001 From: Dima Zavin Date: Thu, 4 Dec 2008 12:19:57 -0800 Subject: [PATCH 0607/1442] ANDROID: mtd: nand: Allow NAND chip ids to be included standalone. Lets non-standard NAND drivers take advantage of known NAND chip information. Change-Id: I87e2fcb40b07b2ec91e102f1fa7f419a4b4af0a3 Signed-off-by: Dima Zavin --- drivers/mtd/nand/Kconfig | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 7b7a887b4709..bfa587d65aa2 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -1,3 +1,10 @@ +config MTD_NAND_IDS + tristate "Include chip ids for known NAND devices." + depends on MTD + help + Useful for NAND drivers that do not use the NAND subsystem but + still like to take advantage of the known chip information. + config MTD_NAND_ECC tristate @@ -109,9 +116,6 @@ config MTD_NAND_OMAP_BCH config MTD_NAND_OMAP_BCH_BUILD def_tristate MTD_NAND_OMAP2 && MTD_NAND_OMAP_BCH -config MTD_NAND_IDS - tristate - config MTD_NAND_RICOH tristate "Ricoh xD card reader" default n -- GitLab From 6bb7b6048012d5b395cc9ded8512ac9f06d438f1 Mon Sep 17 00:00:00 2001 From: San Mehat Date: Tue, 11 Nov 2008 09:35:36 -0800 Subject: [PATCH 0608/1442] ANDROID: mmc: sd: Add new CONFIG_MMC_PARANOID_SD_INIT for enabling retries during SD detection Change-Id: I4e6b9d9a3600d7efbee1d8379e45db11c57827f2 Signed-off-by: San Mehat --- drivers/mmc/core/Kconfig | 8 ++++++++ drivers/mmc/core/sd.c | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig index 250f223aaa80..c4ec6c852a8e 100644 --- a/drivers/mmc/core/Kconfig +++ b/drivers/mmc/core/Kconfig @@ -22,3 +22,11 @@ config PWRSEQ_SIMPLE This driver can also be built as a module. If so, the module will be called pwrseq_simple. + +config MMC_PARANOID_SD_INIT + bool "Enable paranoid SD card initialization (EXPERIMENTAL)" + help + If you say Y here, the MMC layer will be extra paranoid + about re-trying SD init requests. This can be a useful + work-around for buggy controllers and hardware. Enable + if you are experiencing issues with SD detection. diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 73c762a28dfe..07f97999d1c0 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -839,6 +839,9 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card, bool reinit) { int err; +#ifdef CONFIG_MMC_PARANOID_SD_INIT + int retries; +#endif if (!reinit) { /* @@ -865,7 +868,26 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card, /* * Fetch switch information from card. */ +#ifdef CONFIG_MMC_PARANOID_SD_INIT + for (retries = 1; retries <= 3; retries++) { + err = mmc_read_switch(card); + if (!err) { + if (retries > 1) { + printk(KERN_WARNING + "%s: recovered\n", + mmc_hostname(host)); + } + break; + } else { + printk(KERN_WARNING + "%s: read switch failed (attempt %d)\n", + mmc_hostname(host), retries); + } + } +#else err = mmc_read_switch(card); +#endif + if (err) return err; } -- GitLab From 3d1bf5252a94654b60dc779c432dec25b5418df6 Mon Sep 17 00:00:00 2001 From: San Mehat Date: Mon, 1 Dec 2008 08:52:34 -0800 Subject: [PATCH 0609/1442] ANDROID: mmc: sd: When resuming, try a little harder to init the card Signed-off-by: San Mehat --- drivers/mmc/core/sd.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 07f97999d1c0..8538225d81ca 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -1157,6 +1157,9 @@ static int mmc_sd_suspend(struct mmc_host *host) static int _mmc_sd_resume(struct mmc_host *host) { int err = 0; +#ifdef CONFIG_MMC_PARANOID_SD_INIT + int retries; +#endif BUG_ON(!host); BUG_ON(!host->card); @@ -1167,7 +1170,23 @@ static int _mmc_sd_resume(struct mmc_host *host) goto out; mmc_power_up(host, host->card->ocr); +#ifdef CONFIG_MMC_PARANOID_SD_INIT + retries = 5; + while (retries) { + err = mmc_sd_init_card(host, host->card->ocr, host->card); + + if (err) { + printk(KERN_ERR "%s: Re-init card rc = %d (retries = %d)\n", + mmc_hostname(host), err, retries); + mdelay(5); + retries--; + continue; + } + break; + } +#else err = mmc_sd_init_card(host, host->card->ocr, host->card); +#endif mmc_card_clr_suspended(host->card); out: -- GitLab From 0a3f5620b56d1d21b070697a7f34a092da635d08 Mon Sep 17 00:00:00 2001 From: San Mehat Date: Thu, 4 Dec 2008 11:18:00 -0800 Subject: [PATCH 0610/1442] ANDROID: mmc: sd: Add retries in re-detection Signed-off-by: San Mehat mmc: sd: Remove debugging printk Signed-off-by: Dima Zavin --- drivers/mmc/core/sd.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 8538225d81ca..fa7ecd16e786 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -1085,7 +1085,10 @@ static int mmc_sd_alive(struct mmc_host *host) */ static void mmc_sd_detect(struct mmc_host *host) { - int err; + int err = 0; +#ifdef CONFIG_MMC_PARANOID_SD_INIT + int retries = 5; +#endif BUG_ON(!host); BUG_ON(!host->card); @@ -1095,7 +1098,23 @@ static void mmc_sd_detect(struct mmc_host *host) /* * Just check if our card has been removed. */ +#ifdef CONFIG_MMC_PARANOID_SD_INIT + while(retries) { + err = mmc_send_status(host->card, NULL); + if (err) { + retries--; + udelay(5); + continue; + } + break; + } + if (!retries) { + printk(KERN_ERR "%s(%s): Unable to re-detect card (%d)\n", + __func__, mmc_hostname(host), err); + } +#else err = _mmc_detect_card_removed(host); +#endif mmc_put_card(host->card); @@ -1261,6 +1280,9 @@ int mmc_attach_sd(struct mmc_host *host) { int err; u32 ocr, rocr; +#ifdef CONFIG_MMC_PARANOID_SD_INIT + int retries; +#endif BUG_ON(!host); WARN_ON(!host->claimed); @@ -1297,9 +1319,27 @@ int mmc_attach_sd(struct mmc_host *host) /* * Detect and init the card. */ +#ifdef CONFIG_MMC_PARANOID_SD_INIT + retries = 5; + while (retries) { + err = mmc_sd_init_card(host, rocr, NULL); + if (err) { + retries--; + continue; + } + break; + } + + if (!retries) { + printk(KERN_ERR "%s: mmc_sd_init_card() failure (err = %d)\n", + mmc_hostname(host), err); + goto err; + } +#else err = mmc_sd_init_card(host, rocr, NULL); if (err) goto err; +#endif mmc_release_host(host); err = mmc_add_card(host->card); -- GitLab From 2b6be3a2b5a0e513dcc59276cbe49eb0b664a76a Mon Sep 17 00:00:00 2001 From: San Mehat Date: Mon, 14 Apr 2008 15:22:49 -0700 Subject: [PATCH 0611/1442] ANDROID: mmc: Add concept of an 'embedded' SDIO device. This is required to support chips which use SDIO for signaling/ communication but do not implement the various card enumeration registers as required for full SD / SDIO cards. mmc: sdio: Fix bug where we're freeing the CIS tables we never allocated when using EMBEDDED_SDIO mmc: Add max_blksize to embedded SDIO data Change-Id: Ibff2e3e991e5522f55ec8c6edc25ed09f2553736 Signed-off-by: San Mehat --- drivers/mmc/core/Kconfig | 9 +++++ drivers/mmc/core/core.c | 16 ++++++++ drivers/mmc/core/sdio.c | 74 +++++++++++++++++++++++++++-------- drivers/mmc/core/sdio_bus.c | 13 +++++- include/linux/amba/mmci.h | 10 +++++ include/linux/mmc/host.h | 17 ++++++++ include/linux/mmc/sdio_func.h | 8 ++++ 7 files changed, 130 insertions(+), 17 deletions(-) diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig index c4ec6c852a8e..4ec3a4febb1a 100644 --- a/drivers/mmc/core/Kconfig +++ b/drivers/mmc/core/Kconfig @@ -23,8 +23,17 @@ config PWRSEQ_SIMPLE This driver can also be built as a module. If so, the module will be called pwrseq_simple. +config MMC_EMBEDDED_SDIO + boolean "MMC embedded SDIO device support (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + If you say Y here, support will be added for embedded SDIO + devices which do not contain the necessary enumeration + support in hardware to be properly detected. + config MMC_PARANOID_SD_INIT bool "Enable paranoid SD card initialization (EXPERIMENTAL)" + depends on EXPERIMENTAL help If you say Y here, the MMC layer will be extra paranoid about re-trying SD init requests. This can be a useful diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 2553d903a82b..3305824f6e76 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -3026,6 +3026,22 @@ void mmc_init_context_info(struct mmc_host *host) init_waitqueue_head(&host->context_info.wait); } +#ifdef CONFIG_MMC_EMBEDDED_SDIO +void mmc_set_embedded_sdio_data(struct mmc_host *host, + struct sdio_cis *cis, + struct sdio_cccr *cccr, + struct sdio_embedded_func *funcs, + int num_funcs) +{ + host->embedded_sdio_data.cis = cis; + host->embedded_sdio_data.cccr = cccr; + host->embedded_sdio_data.funcs = funcs; + host->embedded_sdio_data.num_funcs = num_funcs; +} + +EXPORT_SYMBOL(mmc_set_embedded_sdio_data); +#endif + static int __init mmc_init(void) { int ret; diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index bd44ba8116d1..c8a5e7190903 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -28,6 +28,10 @@ #include "sdio_ops.h" #include "sdio_cis.h" +#ifdef CONFIG_MMC_EMBEDDED_SDIO +#include +#endif + static int sdio_read_fbr(struct sdio_func *func) { int ret; @@ -697,19 +701,35 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, goto finish; } - /* - * Read the common registers. - */ - err = sdio_read_cccr(card, ocr); - if (err) - goto remove; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + if (host->embedded_sdio_data.cccr) + memcpy(&card->cccr, host->embedded_sdio_data.cccr, sizeof(struct sdio_cccr)); + else { +#endif + /* + * Read the common registers. + */ + err = sdio_read_cccr(card, ocr); + if (err) + goto remove; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + } +#endif - /* - * Read the common CIS tuples. - */ - err = sdio_read_common_cis(card); - if (err) - goto remove; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + if (host->embedded_sdio_data.cis) + memcpy(&card->cis, host->embedded_sdio_data.cis, sizeof(struct sdio_cis)); + else { +#endif + /* + * Read the common CIS tuples. + */ + err = sdio_read_common_cis(card); + if (err) + goto remove; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + } +#endif if (oldcard) { int same = (card->cis.vendor == oldcard->cis.vendor && @@ -1118,14 +1138,36 @@ int mmc_attach_sdio(struct mmc_host *host) funcs = (ocr & 0x70000000) >> 28; card->sdio_funcs = 0; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + if (host->embedded_sdio_data.funcs) + card->sdio_funcs = funcs = host->embedded_sdio_data.num_funcs; +#endif + /* * Initialize (but don't add) all present functions. */ for (i = 0; i < funcs; i++, card->sdio_funcs++) { - err = sdio_init_func(host->card, i + 1); - if (err) - goto remove; - +#ifdef CONFIG_MMC_EMBEDDED_SDIO + if (host->embedded_sdio_data.funcs) { + struct sdio_func *tmp; + + tmp = sdio_alloc_func(host->card); + if (IS_ERR(tmp)) + goto remove; + tmp->num = (i + 1); + card->sdio_func[i] = tmp; + tmp->class = host->embedded_sdio_data.funcs[i].f_class; + tmp->max_blksize = host->embedded_sdio_data.funcs[i].f_maxblksize; + tmp->vendor = card->cis.vendor; + tmp->device = card->cis.device; + } else { +#endif + err = sdio_init_func(host->card, i + 1); + if (err) + goto remove; +#ifdef CONFIG_MMC_EMBEDDED_SDIO + } +#endif /* * Enable Runtime PM for this func (if supported) */ diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index 86f5b3223aae..1499d5333c79 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -28,6 +28,10 @@ #include "sdio_cis.h" #include "sdio_bus.h" +#ifdef CONFIG_MMC_EMBEDDED_SDIO +#include +#endif + #define to_sdio_driver(d) container_of(d, struct sdio_driver, drv) /* show configuration fields */ @@ -263,7 +267,14 @@ static void sdio_release_func(struct device *dev) { struct sdio_func *func = dev_to_sdio_func(dev); - sdio_free_func_cis(func); +#ifdef CONFIG_MMC_EMBEDDED_SDIO + /* + * If this device is embedded then we never allocated + * cis tables for this func + */ + if (!func->card->host->embedded_sdio_data.funcs) +#endif + sdio_free_func_cis(func); kfree(func->info); diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h index 8c98113069ce..eff56cb0016a 100644 --- a/include/linux/amba/mmci.h +++ b/include/linux/amba/mmci.h @@ -5,6 +5,15 @@ #define AMBA_MMCI_H #include +#include +#include + +struct embedded_sdio_data { + struct sdio_cis cis; + struct sdio_cccr cccr; + struct sdio_embedded_func *funcs; + int num_funcs; +}; /** * struct mmci_platform_data - platform configuration for the MMCI @@ -31,6 +40,7 @@ struct mmci_platform_data { int gpio_wp; int gpio_cd; bool cd_invert; + struct embedded_sdio_data *embedded_sdio; }; #endif diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0b2439441cc8..6ddd1402baab 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -397,6 +397,15 @@ struct mmc_host { int dsr_req; /* DSR value is valid */ u32 dsr; /* optional driver stage (DSR) value */ +#ifdef CONFIG_MMC_EMBEDDED_SDIO + struct { + struct sdio_cis *cis; + struct sdio_cccr *cccr; + struct sdio_embedded_func *funcs; + int num_funcs; + } embedded_sdio_data; +#endif + unsigned long private[0] ____cacheline_aligned; }; @@ -406,6 +415,14 @@ void mmc_remove_host(struct mmc_host *); void mmc_free_host(struct mmc_host *); int mmc_of_parse(struct mmc_host *host); +#ifdef CONFIG_MMC_EMBEDDED_SDIO +extern void mmc_set_embedded_sdio_data(struct mmc_host *host, + struct sdio_cis *cis, + struct sdio_cccr *cccr, + struct sdio_embedded_func *funcs, + int num_funcs); +#endif + static inline void *mmc_priv(struct mmc_host *host) { return (void *)host->private; diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index aab032a6ae61..2e5e4baaf5ac 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -22,6 +22,14 @@ struct sdio_func; typedef void (sdio_irq_handler_t)(struct sdio_func *); +/* + * Structure used to hold embedded SDIO device data from platform layer + */ +struct sdio_embedded_func { + uint8_t f_class; + uint32_t f_maxblksize; +}; + /* * SDIO function CIS tuple (unknown to the core) */ -- GitLab From 56d0106777dbf47ba656920fb2f5574f3f58c371 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 11 Nov 2008 11:22:38 -0800 Subject: [PATCH 0612/1442] ANDROID: mmc: Add sdio_readb_ext() function Change-Id: I9b410c8a13724795b23764012fd3be8f53747299 --- drivers/mmc/core/sdio_io.c | 33 +++++++++++++++++++++++++++++++++ include/linux/mmc/sdio_func.h | 2 ++ 2 files changed, 35 insertions(+) diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c index 406e5f037e32..3734cba53dbb 100644 --- a/drivers/mmc/core/sdio_io.c +++ b/drivers/mmc/core/sdio_io.c @@ -389,6 +389,39 @@ u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret) } EXPORT_SYMBOL_GPL(sdio_readb); +/** + * sdio_readb_ext - read a single byte from a SDIO function + * @func: SDIO function to access + * @addr: address to read + * @err_ret: optional status value from transfer + * @in: value to add to argument + * + * Reads a single byte from the address space of a given SDIO + * function. If there is a problem reading the address, 0xff + * is returned and @err_ret will contain the error code. + */ +unsigned char sdio_readb_ext(struct sdio_func *func, unsigned int addr, + int *err_ret, unsigned in) +{ + int ret; + unsigned char val; + + BUG_ON(!func); + + if (err_ret) + *err_ret = 0; + + ret = mmc_io_rw_direct(func->card, 0, func->num, addr, (u8)in, &val); + if (ret) { + if (err_ret) + *err_ret = ret; + return 0xFF; + } + + return val; +} +EXPORT_SYMBOL_GPL(sdio_readb_ext); + /** * sdio_writeb - write a single byte to a SDIO function * @func: SDIO function to access diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index 2e5e4baaf5ac..d0a69e71b8ab 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -136,6 +136,8 @@ extern int sdio_release_irq(struct sdio_func *func); extern unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz); extern u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret); +extern u8 sdio_readb_ext(struct sdio_func *func, unsigned int addr, int *err_ret, + unsigned in); extern u16 sdio_readw(struct sdio_func *func, unsigned int addr, int *err_ret); extern u32 sdio_readl(struct sdio_func *func, unsigned int addr, int *err_ret); -- GitLab From 938571b538f4e5938bc20cd8dc0840e434a3699f Mon Sep 17 00:00:00 2001 From: San Mehat Date: Thu, 15 May 2008 09:15:37 -0700 Subject: [PATCH 0613/1442] ANDROID: mmc: Add new API call 'sdio_reset_comm' for resetting communication with an SDIO device Signed-off-by: San Mehat --- drivers/mmc/core/sdio.c | 57 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index c8a5e7190903..5c51e0313752 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -1215,3 +1215,60 @@ int mmc_attach_sdio(struct mmc_host *host) return err; } +int sdio_reset_comm(struct mmc_card *card) +{ + struct mmc_host *host = card->host; + u32 ocr; + int err; + + printk("%s():\n", __func__); + mmc_go_idle(host); + + mmc_set_clock(host, host->f_min); + + err = mmc_send_io_op_cond(host, 0, &ocr); + if (err) + goto err; + + host->ocr = mmc_select_voltage(host, ocr); + if (!host->ocr) { + err = -EINVAL; + goto err; + } + + err = mmc_send_io_op_cond(host, host->ocr, &ocr); + if (err) + goto err; + + if (mmc_host_is_spi(host)) { + err = mmc_spi_set_crc(host, use_spi_crc); + if (err) + goto err; + } + + if (!mmc_host_is_spi(host)) { + err = mmc_send_relative_addr(host, &card->rca); + if (err) + goto err; + mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL); + } + if (!mmc_host_is_spi(host)) { + err = mmc_select_card(card); + if (err) + goto err; + } + + mmc_set_clock(host, card->cis.max_dtr); + err = sdio_enable_wide(card); + if (err) + goto err; + + return 0; + err: + printk("%s: Error resetting SDIO communications (%d)\n", + mmc_hostname(host), err); + return err; +} +EXPORT_SYMBOL(sdio_reset_comm); + + -- GitLab From 7aa92ea46b82378febde5d5faaa5330301c9d802 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 29 Jul 2009 10:22:03 -0700 Subject: [PATCH 0614/1442] ANDROID: mmc: sdio: Claim host in sdio_reset_comm() Signed-off-by: Dmitry Shmidt --- drivers/mmc/core/sdio.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 5c51e0313752..525868dd6068 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -10,6 +10,7 @@ */ #include +#include #include #include @@ -1222,6 +1223,8 @@ int sdio_reset_comm(struct mmc_card *card) int err; printk("%s():\n", __func__); + mmc_claim_host(host); + mmc_go_idle(host); mmc_set_clock(host, host->f_min); @@ -1262,13 +1265,12 @@ int sdio_reset_comm(struct mmc_card *card) err = sdio_enable_wide(card); if (err) goto err; - + mmc_release_host(host); return 0; - err: +err: printk("%s: Error resetting SDIO communications (%d)\n", mmc_hostname(host), err); + mmc_release_host(host); return err; } EXPORT_SYMBOL(sdio_reset_comm); - - -- GitLab From 48ed6faf7101544e9bbc999a09b865e6ed7eda92 Mon Sep 17 00:00:00 2001 From: Daniel Chen Date: Wed, 9 Dec 2009 09:45:36 -0800 Subject: [PATCH 0615/1442] ANDROID: mmc: sdio: Add high speed support to sdio_reset_comm() Signed-off-by: San Mehat --- drivers/mmc/core/sdio.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 525868dd6068..462356f7d9c9 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -1261,7 +1261,28 @@ int sdio_reset_comm(struct mmc_card *card) goto err; } - mmc_set_clock(host, card->cis.max_dtr); + /* + * Switch to high-speed (if supported). + */ + err = sdio_enable_hs(card); + if (err) + goto err; + + /* + * Change to the card's maximum speed. + */ + if (mmc_card_highspeed(card)) { + /* + * The SDIO specification doesn't mention how + * the CIS transfer speed register relates to + * high-speed, but it seems that 50 MHz is + * mandatory. + */ + mmc_set_clock(host, 50000000); + } else { + mmc_set_clock(host, card->cis.max_dtr); + } + err = sdio_enable_wide(card); if (err) goto err; -- GitLab From 137aeca2f2041e6e3c08ddbff650efc9a562817a Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 6 Oct 2010 17:25:02 -0700 Subject: [PATCH 0616/1442] ANDROID: mmc: sdio: Fix enable_hs and enable_wide in sdio_reset_comm() Signed-off-by: Dmitry Shmidt --- drivers/mmc/core/sdio.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 462356f7d9c9..64909e8479a8 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -1246,7 +1246,7 @@ int sdio_reset_comm(struct mmc_card *card) if (mmc_host_is_spi(host)) { err = mmc_spi_set_crc(host, use_spi_crc); if (err) - goto err; + goto err; } if (!mmc_host_is_spi(host)) { @@ -1265,27 +1265,22 @@ int sdio_reset_comm(struct mmc_card *card) * Switch to high-speed (if supported). */ err = sdio_enable_hs(card); - if (err) + if (err > 0) + mmc_sd_go_highspeed(card); + else if (err) goto err; /* * Change to the card's maximum speed. */ - if (mmc_card_highspeed(card)) { - /* - * The SDIO specification doesn't mention how - * the CIS transfer speed register relates to - * high-speed, but it seems that 50 MHz is - * mandatory. - */ - mmc_set_clock(host, 50000000); - } else { - mmc_set_clock(host, card->cis.max_dtr); - } + mmc_set_clock(host, mmc_sdio_get_max_clock(card)); - err = sdio_enable_wide(card); - if (err) + err = sdio_enable_4bit_bus(card); + if (err > 0) + mmc_set_bus_width(host, MMC_BUS_WIDTH_4); + else if (err) goto err; + mmc_release_host(host); return 0; err: -- GitLab From 5b7f909e83b81198189640217bfa5a161c1827e7 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 19 Mar 2014 12:46:49 -0700 Subject: [PATCH 0617/1442] ANDROID: mmc: sdio: fix sdio_reset_comm() voltage selection Change-Id: I2fa35ee9291c4c60e55fc11d923ae686a8f81920 Signed-off-by: Dmitry Shmidt --- drivers/mmc/core/sdio.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 64909e8479a8..3418a205c716 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -1220,6 +1220,7 @@ int sdio_reset_comm(struct mmc_card *card) { struct mmc_host *host = card->host; u32 ocr; + u32 rocr; int err; printk("%s():\n", __func__); @@ -1233,13 +1234,13 @@ int sdio_reset_comm(struct mmc_card *card) if (err) goto err; - host->ocr = mmc_select_voltage(host, ocr); - if (!host->ocr) { + rocr = mmc_select_voltage(host, ocr); + if (!rocr) { err = -EINVAL; goto err; } - err = mmc_send_io_op_cond(host, host->ocr, &ocr); + err = mmc_sdio_init_card(host, rocr, card, 0); if (err) goto err; -- GitLab From 6ddb84e5ff8fb8635ce642333637cf7e81750c11 Mon Sep 17 00:00:00 2001 From: Hosung Kim Date: Mon, 23 Jul 2012 17:33:17 +0900 Subject: [PATCH 0618/1442] ANDROID: mmc: sdio: Fix sdio_reset_comm for sync mmc_sdio_init_card function is doing necessary initialization Change-Id: I7d2e432b2af8a76267378acba07e3e4e8fd1e6bc Signed-off-by: Hosung Kim --- drivers/mmc/core/sdio.c | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 3418a205c716..324116600333 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -1244,44 +1244,6 @@ int sdio_reset_comm(struct mmc_card *card) if (err) goto err; - if (mmc_host_is_spi(host)) { - err = mmc_spi_set_crc(host, use_spi_crc); - if (err) - goto err; - } - - if (!mmc_host_is_spi(host)) { - err = mmc_send_relative_addr(host, &card->rca); - if (err) - goto err; - mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL); - } - if (!mmc_host_is_spi(host)) { - err = mmc_select_card(card); - if (err) - goto err; - } - - /* - * Switch to high-speed (if supported). - */ - err = sdio_enable_hs(card); - if (err > 0) - mmc_sd_go_highspeed(card); - else if (err) - goto err; - - /* - * Change to the card's maximum speed. - */ - mmc_set_clock(host, mmc_sdio_get_max_clock(card)); - - err = sdio_enable_4bit_bus(card); - if (err > 0) - mmc_set_bus_width(host, MMC_BUS_WIDTH_4); - else if (err) - goto err; - mmc_release_host(host); return 0; err: -- GitLab From 6fc8bec2f306e24258649e8a953c474eb9626746 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Thu, 7 Oct 2010 14:39:16 -0700 Subject: [PATCH 0619/1442] ANDROID: mmc: Add "ignore mmc pm notify" functionality Change-Id: I20821a82831b07ca037973d5d92e832372c6b583 Signed-off-by: Dmitry Shmidt --- drivers/mmc/core/host.c | 6 ++++-- include/linux/mmc/pm.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 98f25ffb4258..38bb255ebab5 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -431,7 +431,8 @@ int mmc_add_host(struct mmc_host *host) #endif mmc_start_host(host); - mmc_register_pm_notifier(host); + if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY)) + mmc_register_pm_notifier(host); return 0; } @@ -448,7 +449,8 @@ EXPORT_SYMBOL(mmc_add_host); */ void mmc_remove_host(struct mmc_host *host) { - mmc_unregister_pm_notifier(host); + if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY)) + mmc_unregister_pm_notifier(host); mmc_stop_host(host); #ifdef CONFIG_DEBUG_FS diff --git a/include/linux/mmc/pm.h b/include/linux/mmc/pm.h index 4a139204c20c..6e2d6a135c7e 100644 --- a/include/linux/mmc/pm.h +++ b/include/linux/mmc/pm.h @@ -26,5 +26,6 @@ typedef unsigned int mmc_pm_flag_t; #define MMC_PM_KEEP_POWER (1 << 0) /* preserve card power during suspend */ #define MMC_PM_WAKE_SDIO_IRQ (1 << 1) /* wake up host system on SDIO IRQ assertion */ +#define MMC_PM_IGNORE_PM_NOTIFY (1 << 2) /* ignore mmc pm notify */ #endif /* LINUX_MMC_PM_H */ -- GitLab From d775b65ecaf6059ab06d84709e287bb4dae68c78 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 18 Mar 2013 11:57:28 -0700 Subject: [PATCH 0620/1442] ANDROID: mmc: core: Remove stray CONFIG_EXPERIMENTAL dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_EXPERIMENTAL has been removed from the kernel, so clean up its use in MMC_EMBEDDED_SDIO and MMC_PARANOID_SD_INIT options. Change-Id: If414c265134b36740a84564274a631803c8e81b4 Cc: Arve Hjønnevåg Cc: San Mehat Cc: Android Kernel Team Reported-by: Jon Medhurst (Tixy) Signed-off-by: John Stultz --- drivers/mmc/core/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig index 4ec3a4febb1a..daad32f85033 100644 --- a/drivers/mmc/core/Kconfig +++ b/drivers/mmc/core/Kconfig @@ -25,7 +25,6 @@ config PWRSEQ_SIMPLE config MMC_EMBEDDED_SDIO boolean "MMC embedded SDIO device support (EXPERIMENTAL)" - depends on EXPERIMENTAL help If you say Y here, support will be added for embedded SDIO devices which do not contain the necessary enumeration @@ -33,7 +32,6 @@ config MMC_EMBEDDED_SDIO config MMC_PARANOID_SD_INIT bool "Enable paranoid SD card initialization (EXPERIMENTAL)" - depends on EXPERIMENTAL help If you say Y here, the MMC layer will be extra paranoid about re-trying SD init requests. This can be a useful -- GitLab From ef140e794bc2f47f033ffeb0864a51cc9f9a45a8 Mon Sep 17 00:00:00 2001 From: San Mehat Date: Sat, 21 Mar 2009 18:48:54 -0700 Subject: [PATCH 0621/1442] ANDROID: fs: block_dump: Don't display inode changes if block_dump < 2 Signed-off-by: San Mehat --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 05713a5da083..ffec69dfb80d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2104,7 +2104,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) (dirtytime && (inode->i_state & I_DIRTY_INODE))) return; - if (unlikely(block_dump)) + if (unlikely(block_dump > 1)) block_dump___mark_inode_dirty(inode); spin_lock(&inode->i_lock); -- GitLab From a2624d7b9d73c92ca4f333cbf9e299fe95965d67 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Wed, 15 Oct 2008 15:34:49 -0400 Subject: [PATCH 0622/1442] ANDROID: Add android_aid.h Add , our mapping of AID defines to gid numbers. Change-Id: I3a02eb2b5c7e336e3de0cb45d8e04ec82f7281b4 Signed-off-by: Robert Love --- include/linux/android_aid.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/android_aid.h diff --git a/include/linux/android_aid.h b/include/linux/android_aid.h new file mode 100644 index 000000000000..dc66530e5fc7 --- /dev/null +++ b/include/linux/android_aid.h @@ -0,0 +1,25 @@ +/* include/linux/android_aid.h + * + * Copyright (C) 2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_ANDROID_AID_H +#define _LINUX_ANDROID_AID_H + +/* AIDs that the kernel treats differently */ +#define AID_OBSOLETE_000 KGIDT_INIT(3001) /* was NET_BT_ADMIN */ +#define AID_OBSOLETE_001 KGIDT_INIT(3002) /* was NET_BT */ +#define AID_INET KGIDT_INIT(3003) +#define AID_NET_RAW KGIDT_INIT(3004) + +#endif -- GitLab From ec2622b0c41f49e3e8bef9b7ac10c59ebc6432c2 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Wed, 15 Oct 2008 15:35:44 -0400 Subject: [PATCH 0623/1442] ANDROID: Paranoid network. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With CONFIG_ANDROID_PARANOID_NETWORK, require specific uids/gids to instantiate network sockets. Signed-off-by: Robert Love paranoid networking: Use in_egroup_p() to check group membership The previous group_search() caused trouble for partners with module builds. in_egroup_p() is also cleaner. Signed-off-by: Nick Pelly Fix 2.6.29 build. Signed-off-by: Arve Hjønnevåg net: Fix compilation of the IPv6 module Fix compilation of the IPv6 module -- current->euid does not exist anymore, current_euid() is what needs to be used. Signed-off-by: Steinar H. Gunderson net: bluetooth: Remove the AID_NET_BT* gid numbers Removed bluetooth checks for AID_NET_BT and AID_NET_BT_ADMIN which are not useful anymore. This is in preparation for getting rid of all the AID_* gids. Change-Id: I879d7181f07532784499ef152288d12a03ab6354 Signed-off-by: JP Abgrall --- net/Kconfig | 6 ++++++ net/bluetooth/af_bluetooth.c | 29 +++++++++++++++++++++++++++++ net/ipv4/af_inet.c | 31 ++++++++++++++++++++++++++++++- net/ipv6/af_inet6.c | 32 +++++++++++++++++++++++++++++++- 4 files changed, 96 insertions(+), 2 deletions(-) diff --git a/net/Kconfig b/net/Kconfig index 7b6cd340b72b..1870b35a7aba 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -89,6 +89,12 @@ source "net/netlabel/Kconfig" endif # if INET +config ANDROID_PARANOID_NETWORK + bool "Only allow certain groups to create sockets" + default y + help + none + config NETWORK_SECMARK bool "Security Marking" help diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 1aff2da9bc74..4b325250236f 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -106,11 +106,40 @@ void bt_sock_unregister(int proto) } EXPORT_SYMBOL(bt_sock_unregister); +#ifdef CONFIG_PARANOID_NETWORK +static inline int current_has_bt_admin(void) +{ + return !current_euid(); +} + +static inline int current_has_bt(void) +{ + return current_has_bt_admin(); +} +# else +static inline int current_has_bt_admin(void) +{ + return 1; +} + +static inline int current_has_bt(void) +{ + return 1; +} +#endif + static int bt_sock_create(struct net *net, struct socket *sock, int proto, int kern) { int err; + if (proto == BTPROTO_RFCOMM || proto == BTPROTO_SCO || + proto == BTPROTO_L2CAP) { + if (!current_has_bt()) + return -EPERM; + } else if (!current_has_bt_admin()) + return -EPERM; + if (net != &init_net) return -EAFNOSUPPORT; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 215143246e4b..92ba1dcb5db9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -121,6 +121,9 @@ #endif #include +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include +#endif /* The inetsw table contains everything that inet_create needs to * build a new socket. @@ -237,6 +240,29 @@ int inet_listen(struct socket *sock, int backlog) } EXPORT_SYMBOL(inet_listen); +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +static inline int current_has_network(void) +{ + return (!current_euid() || in_egroup_p(AID_INET) || + in_egroup_p(AID_NET_RAW)); +} +static inline int current_has_cap(struct net *net, int cap) +{ + if (cap == CAP_NET_RAW && in_egroup_p(AID_NET_RAW)) + return 1; + return ns_capable(net->user_ns, cap); +} +# else +static inline int current_has_network(void) +{ + return 1; +} +static inline int current_has_cap(struct net *net, int cap) +{ + return ns_capable(net->user_ns, cap); +} +#endif + /* * Create an inet socket. */ @@ -255,6 +281,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, if (protocol < 0 || protocol >= IPPROTO_MAX) return -EINVAL; + if (!current_has_network()) + return -EACCES; + sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ @@ -304,7 +333,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, err = -EPERM; if (sock->type == SOCK_RAW && !kern && - !ns_capable(net->user_ns, CAP_NET_RAW)) + !current_has_cap(net, CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 46ad699937fd..b48e91c7390c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -65,6 +65,10 @@ #include #include +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include +#endif + #include "ip6_offload.h" MODULE_AUTHOR("Cast of dozens"); @@ -106,6 +110,29 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +static inline int current_has_network(void) +{ + return (!current_euid() || in_egroup_p(AID_INET) || + in_egroup_p(AID_NET_RAW)); +} +static inline int current_has_cap(struct net *net, int cap) +{ + if (cap == CAP_NET_RAW && in_egroup_p(AID_NET_RAW)) + return 1; + return ns_capable(net->user_ns, cap); +} +# else +static inline int current_has_network(void) +{ + return 1; +} +static inline int current_has_cap(struct net *net, int cap) +{ + return ns_capable(net->user_ns, cap); +} +#endif + static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) { @@ -121,6 +148,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, if (protocol < 0 || protocol >= IPPROTO_MAX) return -EINVAL; + if (!current_has_network()) + return -EACCES; + /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; @@ -168,7 +198,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, err = -EPERM; if (sock->type == SOCK_RAW && !kern && - !ns_capable(net->user_ns, CAP_NET_RAW)) + !current_has_cap(net, CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; -- GitLab From 15caf71a3fc8af56e895e80e5aa6060ed945f37b Mon Sep 17 00:00:00 2001 From: Chia-chi Yeh Date: Fri, 19 Jun 2009 07:15:05 +0800 Subject: [PATCH 0624/1442] ANDROID: security: Add AID_NET_RAW and AID_NET_ADMIN capability check in cap_capable(). Signed-off-by: Chia-chi Yeh --- include/linux/android_aid.h | 1 + security/commoncap.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/include/linux/android_aid.h b/include/linux/android_aid.h index dc66530e5fc7..3d7a5ead1200 100644 --- a/include/linux/android_aid.h +++ b/include/linux/android_aid.h @@ -21,5 +21,6 @@ #define AID_OBSOLETE_001 KGIDT_INIT(3002) /* was NET_BT */ #define AID_INET KGIDT_INIT(3003) #define AID_NET_RAW KGIDT_INIT(3004) +#define AID_NET_ADMIN KGIDT_INIT(3005) #endif diff --git a/security/commoncap.c b/security/commoncap.c index 8df676fbd393..8816e499335e 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -31,6 +31,10 @@ #include #include +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include +#endif + /* * If a non-root user executes a setuid-root binary in * !secure(SECURE_NOROOT) mode, then we raise capabilities. @@ -73,6 +77,11 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, { struct user_namespace *ns = targ_ns; + if (cap == CAP_NET_RAW && in_egroup_p(AID_NET_RAW)) + return 0; + if (cap == CAP_NET_ADMIN && in_egroup_p(AID_NET_ADMIN)) + return 0; + /* See if cred has the capability in the target user namespace * by examining the target user namespace and all of the target * user namespace's parents. -- GitLab From c003a30a16fe948ceceea88487e7afe45df6c123 Mon Sep 17 00:00:00 2001 From: Chia-chi Yeh Date: Tue, 30 Jun 2009 11:23:04 +0800 Subject: [PATCH 0625/1442] ANDROID: net: Replace AID_NET_RAW checks with capable(CAP_NET_RAW). Signed-off-by: Chia-chi Yeh --- net/ipv4/af_inet.c | 36 +++++++++++------------------------- net/ipv6/af_inet6.c | 36 +++++++++++------------------------- 2 files changed, 22 insertions(+), 50 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 92ba1dcb5db9..c836bfeb4f43 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -123,6 +123,16 @@ #ifdef CONFIG_ANDROID_PARANOID_NETWORK #include + +static inline int current_has_network(void) +{ + return in_egroup_p(AID_INET) || capable(CAP_NET_RAW); +} +#else +static inline int current_has_network(void) +{ + return 1; +} #endif /* The inetsw table contains everything that inet_create needs to @@ -240,29 +250,6 @@ int inet_listen(struct socket *sock, int backlog) } EXPORT_SYMBOL(inet_listen); -#ifdef CONFIG_ANDROID_PARANOID_NETWORK -static inline int current_has_network(void) -{ - return (!current_euid() || in_egroup_p(AID_INET) || - in_egroup_p(AID_NET_RAW)); -} -static inline int current_has_cap(struct net *net, int cap) -{ - if (cap == CAP_NET_RAW && in_egroup_p(AID_NET_RAW)) - return 1; - return ns_capable(net->user_ns, cap); -} -# else -static inline int current_has_network(void) -{ - return 1; -} -static inline int current_has_cap(struct net *net, int cap) -{ - return ns_capable(net->user_ns, cap); -} -#endif - /* * Create an inet socket. */ @@ -332,8 +319,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, } err = -EPERM; - if (sock->type == SOCK_RAW && !kern && - !current_has_cap(net, CAP_NET_RAW)) + if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b48e91c7390c..56297dc0534b 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -67,6 +67,16 @@ #ifdef CONFIG_ANDROID_PARANOID_NETWORK #include + +static inline int current_has_network(void) +{ + return in_egroup_p(AID_INET) || capable(CAP_NET_RAW); +} +#else +static inline int current_has_network(void) +{ + return 1; +} #endif #include "ip6_offload.h" @@ -110,29 +120,6 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } -#ifdef CONFIG_ANDROID_PARANOID_NETWORK -static inline int current_has_network(void) -{ - return (!current_euid() || in_egroup_p(AID_INET) || - in_egroup_p(AID_NET_RAW)); -} -static inline int current_has_cap(struct net *net, int cap) -{ - if (cap == CAP_NET_RAW && in_egroup_p(AID_NET_RAW)) - return 1; - return ns_capable(net->user_ns, cap); -} -# else -static inline int current_has_network(void) -{ - return 1; -} -static inline int current_has_cap(struct net *net, int cap) -{ - return ns_capable(net->user_ns, cap); -} -#endif - static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) { @@ -197,8 +184,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, } err = -EPERM; - if (sock->type == SOCK_RAW && !kern && - !current_has_cap(net, CAP_NET_RAW)) + if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; -- GitLab From be916e845b2b36bed2cb64ddaa3a034c461c7ae2 Mon Sep 17 00:00:00 2001 From: Chia-chi Yeh Date: Fri, 15 Jul 2011 15:32:57 -0700 Subject: [PATCH 0626/1442] ANDROID: net: Only NET_ADMIN is allowed to fully control TUN interfaces. Signed-off-by: Chia-chi Yeh --- drivers/net/tun.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index db6acecabeaa..929dafb86458 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1991,6 +1991,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, int le; int ret; +#ifdef CONFIG_ANDROID_PARANOID_NETWORK + if (cmd != TUNGETIFF && !capable(CAP_NET_ADMIN)) { + return -EPERM; + } +#endif + if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT; -- GitLab From 41b377f5495f16a193546cf9cb09fb72cfbb07f5 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Thu, 31 Jul 2008 11:12:44 -0400 Subject: [PATCH 0627/1442] ANDROID: sysfs_net_ipv4: Add sysfs-based knobs for controlling TCP window size Add a family of knobs to /sys/kernel/ipv4 for controlling the TCP window size: tcp_wmem_min tcp_wmem_def tcp_wmem_max tcp_rmem_min tcp_rmem_def tcp_rmem_max This six values mirror the sysctl knobs in /proc/sys/net/ipv4/tcp_wmem and /proc/sys/net/ipv4/tcp_rmem. Sysfs, unlike sysctl, allows us to set and manage the files' permissions and owners. Signed-off-by: Robert Love --- net/ipv4/Makefile | 1 + net/ipv4/sysfs_net_ipv4.c | 88 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 net/ipv4/sysfs_net_ipv4.c diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index bc6a6c8b9bcd..a8b934aa9d84 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -16,6 +16,7 @@ obj-y := route.o inetpeer.o protocol.o \ obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o +obj-$(CONFIG_SYSFS) += sysfs_net_ipv4.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o diff --git a/net/ipv4/sysfs_net_ipv4.c b/net/ipv4/sysfs_net_ipv4.c new file mode 100644 index 000000000000..0cbbf10026a6 --- /dev/null +++ b/net/ipv4/sysfs_net_ipv4.c @@ -0,0 +1,88 @@ +/* + * net/ipv4/sysfs_net_ipv4.c + * + * sysfs-based networking knobs (so we can, unlike with sysctl, control perms) + * + * Copyright (C) 2008 Google, Inc. + * + * Robert Love + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include + +#define CREATE_IPV4_FILE(_name, _var) \ +static ssize_t _name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%d\n", _var); \ +} \ +static ssize_t _name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + int val, ret; \ + ret = sscanf(buf, "%d", &val); \ + if (ret != 1) \ + return -EINVAL; \ + if (val < 0) \ + return -EINVAL; \ + _var = val; \ + return count; \ +} \ +static struct kobj_attribute _name##_attr = \ + __ATTR(_name, 0644, _name##_show, _name##_store) + +CREATE_IPV4_FILE(tcp_wmem_min, sysctl_tcp_wmem[0]); +CREATE_IPV4_FILE(tcp_wmem_def, sysctl_tcp_wmem[1]); +CREATE_IPV4_FILE(tcp_wmem_max, sysctl_tcp_wmem[2]); + +CREATE_IPV4_FILE(tcp_rmem_min, sysctl_tcp_rmem[0]); +CREATE_IPV4_FILE(tcp_rmem_def, sysctl_tcp_rmem[1]); +CREATE_IPV4_FILE(tcp_rmem_max, sysctl_tcp_rmem[2]); + +static struct attribute *ipv4_attrs[] = { + &tcp_wmem_min_attr.attr, + &tcp_wmem_def_attr.attr, + &tcp_wmem_max_attr.attr, + &tcp_rmem_min_attr.attr, + &tcp_rmem_def_attr.attr, + &tcp_rmem_max_attr.attr, + NULL +}; + +static struct attribute_group ipv4_attr_group = { + .attrs = ipv4_attrs, +}; + +static __init int sysfs_ipv4_init(void) +{ + struct kobject *ipv4_kobject; + int ret; + + ipv4_kobject = kobject_create_and_add("ipv4", kernel_kobj); + if (!ipv4_kobject) + return -ENOMEM; + + ret = sysfs_create_group(ipv4_kobject, &ipv4_attr_group); + if (ret) { + kobject_put(ipv4_kobject); + return ret; + } + + return 0; +} + +subsys_initcall(sysfs_ipv4_init); -- GitLab From 5ebc1fc490879cadc45af88f5807fa2e24a12041 Mon Sep 17 00:00:00 2001 From: Chia-chi Yeh Date: Fri, 8 May 2009 04:02:40 +0800 Subject: [PATCH 0628/1442] ANDROID: net: add PPP on L2TP Access Concentrator (PPPoLAC) driver. Change-Id: I3ae3ee7520951ae24269db0ef2898c6455cf6bcc Signed-off-by: Chia-chi Yeh ppolac: dont include px_proto define in if_pppolac.h Change-Id: I55bc9cf91ea0e9e8f7bf5d6e241d188e1269343a Signed-off-by: Dima Zavin --- drivers/net/ppp/Kconfig | 9 + drivers/net/ppp/Makefile | 1 + drivers/net/ppp/pppolac.c | 359 ++++++++++++++++++++++++++++++++++ include/linux/if_pppolac.h | 33 ++++ include/linux/if_pppox.h | 9 + include/uapi/linux/if_pppox.h | 4 +- 6 files changed, 414 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ppp/pppolac.c create mode 100644 include/linux/if_pppolac.h diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig index 1373c6d7278d..b092b5e25c83 100644 --- a/drivers/net/ppp/Kconfig +++ b/drivers/net/ppp/Kconfig @@ -149,6 +149,15 @@ config PPPOL2TP tunnels. L2TP is replacing PPTP for VPN uses. if TTY +config PPPOLAC + tristate "PPP on L2TP Access Concentrator" + depends on PPP && INET + help + L2TP (RFC 2661) is a tunneling protocol widely used in virtual private + networks. This driver handles L2TP data packets between a UDP socket + and a PPP channel, but only permits one session per socket. Thus it is + fairly simple and suited for clients. + config PPP_ASYNC tristate "PPP support for async serial ports" depends on PPP diff --git a/drivers/net/ppp/Makefile b/drivers/net/ppp/Makefile index a6b6297b0066..f14406e0c388 100644 --- a/drivers/net/ppp/Makefile +++ b/drivers/net/ppp/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_PPP_SYNC_TTY) += ppp_synctty.o obj-$(CONFIG_PPPOE) += pppox.o pppoe.o obj-$(CONFIG_PPPOL2TP) += pppox.o obj-$(CONFIG_PPTP) += pppox.o pptp.o +obj-$(CONFIG_PPPOLAC) += pppox.o pppolac.o diff --git a/drivers/net/ppp/pppolac.c b/drivers/net/ppp/pppolac.c new file mode 100644 index 000000000000..8843a9d30911 --- /dev/null +++ b/drivers/net/ppp/pppolac.c @@ -0,0 +1,359 @@ +/* drivers/net/pppolac.c + * + * Driver for PPP on L2TP Access Concentrator / PPPoLAC Socket (RFC 2661) + * + * Copyright (C) 2009 Google, Inc. + * Author: Chia-chi Yeh + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* This driver handles L2TP data packets between a UDP socket and a PPP channel. + * To keep things simple, only one session per socket is permitted. Packets are + * sent via the socket, so it must keep connected to the same address. One must + * not set sequencing in ICCN but let LNS controll it. Currently this driver + * only works on IPv4 due to the lack of UDP encapsulation support in IPv6. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define L2TP_CONTROL_MASK 0x80 +#define L2TP_VERSION_MASK 0x0F +#define L2TP_VERSION 0x02 +#define L2TP_LENGTH_MASK 0x40 +#define L2TP_OFFSET_MASK 0x02 +#define L2TP_SEQUENCE_MASK 0x08 + +#define PPP_ADDR 0xFF +#define PPP_CTRL 0x03 + +union unaligned { + __u32 u32; +} __attribute__((packed)); + +static inline union unaligned *unaligned(void *ptr) +{ + return (union unaligned *)ptr; +} + +static int pppolac_recv(struct sock *sk_udp, struct sk_buff *skb) +{ + struct sock *sk; + struct pppolac_opt *opt; + __u8 bits; + __u8 *ptr; + + /* Drop the packet if it is too short. */ + if (skb->len < sizeof(struct udphdr) + 6) + goto drop; + + /* Put it back if it is a control packet. */ + if (skb->data[sizeof(struct udphdr)] & L2TP_CONTROL_MASK) + return 1; + + /* Now the packet is ours. Skip UDP header. */ + skb_pull(skb, sizeof(struct udphdr)); + + /* Check the version. */ + if ((skb->data[1] & L2TP_VERSION_MASK) != L2TP_VERSION) + goto drop; + bits = skb->data[0]; + ptr = &skb->data[2]; + + /* Check the length if it is present. */ + if (bits & L2TP_LENGTH_MASK) { + if ((ptr[0] << 8 | ptr[1]) != skb->len) + goto drop; + ptr += 2; + } + + /* Skip all fields including optional ones. */ + if (!skb_pull(skb, 6 + (bits & L2TP_SEQUENCE_MASK ? 4 : 0) + + (bits & L2TP_LENGTH_MASK ? 2 : 0) + + (bits & L2TP_OFFSET_MASK ? 2 : 0))) + goto drop; + + /* Skip the offset padding if it is present. */ + if (bits & L2TP_OFFSET_MASK && + !skb_pull(skb, skb->data[-2] << 8 | skb->data[-1])) + goto drop; + + /* Now ptr is pointing to the tunnel and skb is pointing to the payload. + * We have to lock sk_udp to prevent sk from being closed. */ + lock_sock(sk_udp); + sk = sk_udp->sk_user_data; + if (!sk) { + release_sock(sk_udp); + goto drop; + } + sock_hold(sk); + release_sock(sk_udp); + opt = &pppox_sk(sk)->proto.lac; + + /* Check the tunnel and the session. */ + if (unaligned(ptr)->u32 != opt->local) { + sock_put(sk); + goto drop; + } + + /* Check the sequence if it is present. According to RFC 2661 page 10 + * and 43, the only thing to do is updating opt->sequencing. */ + opt->sequencing = bits & L2TP_SEQUENCE_MASK; + + /* Skip PPP address and control if they are present. */ + if (skb->len >= 2 && skb->data[0] == PPP_ADDR && + skb->data[1] == PPP_CTRL) + skb_pull(skb, 2); + + /* Fix PPP protocol if it is compressed. */ + if (skb->len >= 1 && skb->data[0] & 1) + skb_push(skb, 1)[0] = 0; + + /* Finally, deliver the packet to PPP channel. We have to lock sk to + * prevent another thread from calling pppox_unbind_sock(). */ + skb_orphan(skb); + lock_sock(sk); + ppp_input(&pppox_sk(sk)->chan, skb); + release_sock(sk); + sock_put(sk); + return 0; + +drop: + kfree_skb(skb); + return 0; +} + +static int pppolac_xmit(struct ppp_channel *chan, struct sk_buff *skb) +{ + struct sock *sk_udp = (struct sock *)chan->private; + struct pppolac_opt *opt = &pppox_sk(sk_udp->sk_user_data)->proto.lac; + struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT}; + struct kvec iov; + + /* Install PPP address and control. */ + skb_push(skb, 2); + skb->data[0] = PPP_ADDR; + skb->data[1] = PPP_CTRL; + + /* Install L2TP header. */ + if (opt->sequencing) { + skb_push(skb, 10); + skb->data[0] = L2TP_SEQUENCE_MASK; + skb->data[6] = opt->sequence >> 8; + skb->data[7] = opt->sequence; + skb->data[8] = 0; + skb->data[9] = 0; + opt->sequence++; + } else { + skb_push(skb, 6); + skb->data[0] = 0; + } + skb->data[1] = L2TP_VERSION; + unaligned(&skb->data[2])->u32 = opt->remote; + + /* Now send the packet via UDP socket. */ + iov.iov_base = skb->data; + iov.iov_len = skb->len; + kernel_sendmsg(sk_udp->sk_socket, &msg, &iov, 1, skb->len); + kfree_skb(skb); + return 1; +} + +/******************************************************************************/ + +static struct ppp_channel_ops pppolac_channel_ops = { + .start_xmit = pppolac_xmit, +}; + +static int pppolac_connect(struct socket *sock, struct sockaddr *useraddr, + int addrlen, int flags) +{ + struct sock *sk = sock->sk; + struct pppox_sock *po = pppox_sk(sk); + struct sockaddr_pppolac *addr = (struct sockaddr_pppolac *)useraddr; + struct socket *sock_udp = NULL; + struct sock *sk_udp; + int error; + + if (addrlen != sizeof(struct sockaddr_pppolac) || + !addr->local.tunnel || !addr->local.session || + !addr->remote.tunnel || !addr->remote.session) { + return -EINVAL; + } + + lock_sock(sk); + error = -EALREADY; + if (sk->sk_state != PPPOX_NONE) + goto out; + + sock_udp = sockfd_lookup(addr->udp_socket, &error); + if (!sock_udp) + goto out; + sk_udp = sock_udp->sk; + lock_sock(sk_udp); + + /* Remove this check when IPv6 supports UDP encapsulation. */ + error = -EAFNOSUPPORT; + if (sk_udp->sk_family != AF_INET) + goto out; + error = -EPROTONOSUPPORT; + if (sk_udp->sk_protocol != IPPROTO_UDP) + goto out; + error = -EDESTADDRREQ; + if (sk_udp->sk_state != TCP_ESTABLISHED) + goto out; + error = -EBUSY; + if (udp_sk(sk_udp)->encap_type || sk_udp->sk_user_data) + goto out; + + po->chan.hdrlen = 12; + po->chan.private = sk_udp; + po->chan.ops = &pppolac_channel_ops; + po->chan.mtu = PPP_MTU - 80; + po->proto.lac.local = unaligned(&addr->local)->u32; + po->proto.lac.remote = unaligned(&addr->remote)->u32; + + error = ppp_register_channel(&po->chan); + if (error) + goto out; + + sk->sk_state = PPPOX_CONNECTED; + udp_sk(sk_udp)->encap_type = UDP_ENCAP_L2TPINUDP; + udp_sk(sk_udp)->encap_rcv = pppolac_recv; + sk_udp->sk_user_data = sk; + +out: + if (sock_udp) { + release_sock(sk_udp); + if (error) + sockfd_put(sock_udp); + } + release_sock(sk); + return error; +} + +static int pppolac_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (!sk) + return 0; + + lock_sock(sk); + if (sock_flag(sk, SOCK_DEAD)) { + release_sock(sk); + return -EBADF; + } + + if (sk->sk_state != PPPOX_NONE) { + struct sock *sk_udp = (struct sock *)pppox_sk(sk)->chan.private; + lock_sock(sk_udp); + + pppox_unbind_sock(sk); + sk_udp->sk_user_data = NULL; + udp_sk(sk_udp)->encap_type = 0; + udp_sk(sk_udp)->encap_rcv = NULL; + + release_sock(sk_udp); + sockfd_put(sk_udp->sk_socket); + } + + sock_orphan(sk); + sock->sk = NULL; + release_sock(sk); + sock_put(sk); + return 0; +} + +/******************************************************************************/ + +static struct proto pppolac_proto = { + .name = "PPPOLAC", + .owner = THIS_MODULE, + .obj_size = sizeof(struct pppox_sock), +}; + +static struct proto_ops pppolac_proto_ops = { + .family = PF_PPPOX, + .owner = THIS_MODULE, + .release = pppolac_release, + .bind = sock_no_bind, + .connect = pppolac_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = sock_no_poll, + .ioctl = pppox_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = sock_no_recvmsg, + .mmap = sock_no_mmap, +}; + +static int pppolac_create(struct net *net, struct socket *sock) +{ + struct sock *sk; + + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppolac_proto); + if (!sk) + return -ENOMEM; + + sock_init_data(sock, sk); + sock->state = SS_UNCONNECTED; + sock->ops = &pppolac_proto_ops; + sk->sk_protocol = PX_PROTO_OLAC; + sk->sk_state = PPPOX_NONE; + return 0; +} + +/******************************************************************************/ + +static struct pppox_proto pppolac_pppox_proto = { + .create = pppolac_create, + .owner = THIS_MODULE, +}; + +static int __init pppolac_init(void) +{ + int error; + + error = proto_register(&pppolac_proto, 0); + if (error) + return error; + + error = register_pppox_proto(PX_PROTO_OLAC, &pppolac_pppox_proto); + if (error) + proto_unregister(&pppolac_proto); + return error; +} + +static void __exit pppolac_exit(void) +{ + unregister_pppox_proto(PX_PROTO_OLAC); + proto_unregister(&pppolac_proto); +} + +module_init(pppolac_init); +module_exit(pppolac_exit); + +MODULE_DESCRIPTION("PPP on L2TP Access Concentrator (PPPoLAC)"); +MODULE_AUTHOR("Chia-chi Yeh "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/if_pppolac.h b/include/linux/if_pppolac.h new file mode 100644 index 000000000000..c06bd6c8ba26 --- /dev/null +++ b/include/linux/if_pppolac.h @@ -0,0 +1,33 @@ +/* include/linux/if_pppolac.h + * + * Header for PPP on L2TP Access Concentrator / PPPoLAC Socket (RFC 2661) + * + * Copyright (C) 2009 Google, Inc. + * Author: Chia-chi Yeh + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_IF_PPPOLAC_H +#define __LINUX_IF_PPPOLAC_H + +#include +#include + +struct sockaddr_pppolac { + sa_family_t sa_family; /* AF_PPPOX */ + unsigned int sa_protocol; /* PX_PROTO_OLAC */ + int udp_socket; + struct __attribute__((packed)) { + __u16 tunnel, session; + } local, remote; +} __attribute__((packed)); + +#endif /* __LINUX_IF_PPPOLAC_H */ diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index ba7a9b0c7c57..fc9cb260af7f 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -43,6 +43,14 @@ struct pptp_opt { u32 seq_sent, seq_recv; int ppp_flags; }; + +struct pppolac_opt { + __u32 local; + __u32 remote; + __u16 sequence; + __u8 sequencing; +}; + #include struct pppox_sock { @@ -53,6 +61,7 @@ struct pppox_sock { union { struct pppoe_opt pppoe; struct pptp_opt pptp; + struct pppolac_opt lac; } proto; __be16 num; }; diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index d37bbb17a007..956bd71841e2 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -59,7 +60,8 @@ struct pptp_addr { #define PX_PROTO_OE 0 /* Currently just PPPoE */ #define PX_PROTO_OL2TP 1 /* Now L2TP also */ #define PX_PROTO_PPTP 2 -#define PX_MAX_PROTO 3 +#define PX_PROTO_OLAC 3 +#define PX_MAX_PROTO 4 struct sockaddr_pppox { __kernel_sa_family_t sa_family; /* address family, AF_PPPOX */ -- GitLab From 70511fd9393c6110623d4b1fabf9204ecde38056 Mon Sep 17 00:00:00 2001 From: Chia-chi Yeh Date: Fri, 12 Jun 2009 01:09:30 +0800 Subject: [PATCH 0629/1442] ANDROID: net: add PPP on PPTP Network Server (PPPoPNS) driver. Signed-off-by: Chia-chi Yeh ppopns: dont include px_proto define in if_pppopns.h Change-Id: I27e687667db5b45182562f4a517a2e6cec6b1350 Signed-off-by: Dima Zavin --- drivers/net/ppp/Kconfig | 8 + drivers/net/ppp/Makefile | 1 + drivers/net/ppp/pppopns.c | 322 ++++++++++++++++++++++++++++++++++ include/linux/if_pppopns.h | 32 ++++ include/linux/if_pppox.h | 7 + include/uapi/linux/if_pppox.h | 4 +- 6 files changed, 373 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ppp/pppopns.c create mode 100644 include/linux/if_pppopns.h diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig index b092b5e25c83..282aec4860eb 100644 --- a/drivers/net/ppp/Kconfig +++ b/drivers/net/ppp/Kconfig @@ -158,6 +158,14 @@ config PPPOLAC and a PPP channel, but only permits one session per socket. Thus it is fairly simple and suited for clients. +config PPPOPNS + tristate "PPP on PPTP Network Server" + depends on PPP && INET + help + PPTP (RFC 2637) is a tunneling protocol widely used in virtual private + networks. This driver handles PPTP data packets between a RAW socket + and a PPP channel. It is fairly simple and easy to use. + config PPP_ASYNC tristate "PPP support for async serial ports" depends on PPP diff --git a/drivers/net/ppp/Makefile b/drivers/net/ppp/Makefile index f14406e0c388..d283d03c4683 100644 --- a/drivers/net/ppp/Makefile +++ b/drivers/net/ppp/Makefile @@ -12,3 +12,4 @@ obj-$(CONFIG_PPPOE) += pppox.o pppoe.o obj-$(CONFIG_PPPOL2TP) += pppox.o obj-$(CONFIG_PPTP) += pppox.o pptp.o obj-$(CONFIG_PPPOLAC) += pppox.o pppolac.o +obj-$(CONFIG_PPPOPNS) += pppox.o pppopns.o diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c new file mode 100644 index 000000000000..8885eba8968e --- /dev/null +++ b/drivers/net/ppp/pppopns.c @@ -0,0 +1,322 @@ +/* drivers/net/pppopns.c + * + * Driver for PPP on PPTP Network Server / PPPoPNS Socket (RFC 2637) + * + * Copyright (C) 2009 Google, Inc. + * Author: Chia-chi Yeh + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* This driver handles PPTP data packets between a RAW socket and a PPP channel. + * The socket is created in the kernel space and connected to the same address + * of the control socket. To keep things simple, packets are always sent with + * sequence but without acknowledgement. This driver should work on both IPv4 + * and IPv6. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GRE_HEADER_SIZE 8 + +#define PPTP_GRE_MASK htons(0x2001) +#define PPTP_GRE_SEQ_MASK htons(0x1000) +#define PPTP_GRE_ACK_MASK htons(0x0080) +#define PPTP_GRE_TYPE htons(0x880B) + +#define PPP_ADDR 0xFF +#define PPP_CTRL 0x03 + +struct header { + __u16 bits; + __u16 type; + __u16 length; + __u16 call; + __u32 sequence; +} __attribute__((packed)); + +static void pppopns_recv(struct sock *sk_raw, int length) +{ + struct sock *sk; + struct pppopns_opt *opt; + struct sk_buff *skb; + struct header *hdr; + + /* Lock sk_raw to prevent sk from being closed. */ + lock_sock(sk_raw); + sk = (struct sock *)sk_raw->sk_user_data; + if (!sk) { + release_sock(sk_raw); + return; + } + sock_hold(sk); + release_sock(sk_raw); + opt = &pppox_sk(sk)->proto.pns; + + /* Process packets from the receive queue. */ + while ((skb = skb_dequeue(&sk_raw->sk_receive_queue))) { + skb_pull(skb, skb_transport_header(skb) - skb->data); + + /* Drop the packet if it is too short. */ + if (skb->len < GRE_HEADER_SIZE) + goto drop; + + /* Check the header. */ + hdr = (struct header *)skb->data; + if (hdr->type != PPTP_GRE_TYPE || hdr->call != opt->local || + (hdr->bits & PPTP_GRE_MASK) != PPTP_GRE_MASK) + goto drop; + + /* Skip all fields including optional ones. */ + if (!skb_pull(skb, GRE_HEADER_SIZE + + (hdr->bits & PPTP_GRE_SEQ_MASK ? 4 : 0) + + (hdr->bits & PPTP_GRE_ACK_MASK ? 4 : 0))) + goto drop; + + /* Check the length. */ + if (skb->len != ntohs(hdr->length)) + goto drop; + + /* Skip PPP address and control if they are present. */ + if (skb->len >= 2 && skb->data[0] == PPP_ADDR && + skb->data[1] == PPP_CTRL) + skb_pull(skb, 2); + + /* Fix PPP protocol if it is compressed. */ + if (skb->len >= 1 && skb->data[0] & 1) + skb_push(skb, 1)[0] = 0; + + /* Deliver the packet to PPP channel. We have to lock sk to + * prevent another thread from calling pppox_unbind_sock(). */ + skb_orphan(skb); + lock_sock(sk); + ppp_input(&pppox_sk(sk)->chan, skb); + release_sock(sk); + continue; +drop: + kfree_skb(skb); + } + sock_put(sk); +} + +static int pppopns_xmit(struct ppp_channel *chan, struct sk_buff *skb) +{ + struct sock *sk_raw = (struct sock *)chan->private; + struct pppopns_opt *opt = &pppox_sk(sk_raw->sk_user_data)->proto.pns; + struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT}; + struct kvec iov; + struct header *hdr; + __u16 length; + + /* Install PPP address and control. */ + skb_push(skb, 2); + skb->data[0] = PPP_ADDR; + skb->data[1] = PPP_CTRL; + length = skb->len; + + /* Install PPTP GRE header. */ + hdr = (struct header *)skb_push(skb, 12); + hdr->bits = PPTP_GRE_MASK | PPTP_GRE_SEQ_MASK; + hdr->type = PPTP_GRE_TYPE; + hdr->length = htons(length); + hdr->call = opt->remote; + hdr->sequence = htonl(opt->sequence); + opt->sequence++; + + /* Now send the packet via RAW socket. */ + iov.iov_base = skb->data; + iov.iov_len = skb->len; + kernel_sendmsg(sk_raw->sk_socket, &msg, &iov, 1, skb->len); + kfree_skb(skb); + return 1; +} + +/******************************************************************************/ + +static struct ppp_channel_ops pppopns_channel_ops = { + .start_xmit = pppopns_xmit, +}; + +static int pppopns_connect(struct socket *sock, struct sockaddr *useraddr, + int addrlen, int flags) +{ + struct sock *sk = sock->sk; + struct pppox_sock *po = pppox_sk(sk); + struct sockaddr_pppopns *addr = (struct sockaddr_pppopns *)useraddr; + struct sockaddr_storage ss; + struct socket *sock_tcp = NULL; + struct socket *sock_raw = NULL; + struct sock *sk_raw; + int error; + + if (addrlen != sizeof(struct sockaddr_pppopns)) + return -EINVAL; + + lock_sock(sk); + error = -EALREADY; + if (sk->sk_state != PPPOX_NONE) + goto out; + + sock_tcp = sockfd_lookup(addr->tcp_socket, &error); + if (!sock_tcp) + goto out; + error = -EPROTONOSUPPORT; + if (sock_tcp->sk->sk_protocol != IPPROTO_TCP) + goto out; + addrlen = sizeof(struct sockaddr_storage); + error = kernel_getpeername(sock_tcp, (struct sockaddr *)&ss, &addrlen); + if (error) + goto out; + + error = sock_create(ss.ss_family, SOCK_RAW, IPPROTO_GRE, &sock_raw); + if (error) + goto out; + error = kernel_connect(sock_raw, (struct sockaddr *)&ss, addrlen, 0); + if (error) + goto out; + sk_raw = sock_raw->sk; + + po->chan.hdrlen = 14; + po->chan.private = sk_raw; + po->chan.ops = &pppopns_channel_ops; + po->chan.mtu = PPP_MTU - 80; + po->proto.pns.local = addr->local; + po->proto.pns.remote = addr->remote; + + error = ppp_register_channel(&po->chan); + if (error) + goto out; + + sk->sk_state = PPPOX_CONNECTED; + sk_raw->sk_user_data = sk; + sk_raw->sk_data_ready = pppopns_recv; + +out: + if (sock_tcp) + sockfd_put(sock_tcp); + if (error && sock_raw) + sock_release(sock_raw); + release_sock(sk); + return error; +} + +static int pppopns_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (!sk) + return 0; + + lock_sock(sk); + if (sock_flag(sk, SOCK_DEAD)) { + release_sock(sk); + return -EBADF; + } + + if (sk->sk_state != PPPOX_NONE) { + struct sock *sk_raw = (struct sock *)pppox_sk(sk)->chan.private; + lock_sock(sk_raw); + pppox_unbind_sock(sk); + sk_raw->sk_user_data = NULL; + release_sock(sk_raw); + sock_release(sk_raw->sk_socket); + } + + sock_orphan(sk); + sock->sk = NULL; + release_sock(sk); + sock_put(sk); + return 0; +} + +/******************************************************************************/ + +static struct proto pppopns_proto = { + .name = "PPPOPNS", + .owner = THIS_MODULE, + .obj_size = sizeof(struct pppox_sock), +}; + +static struct proto_ops pppopns_proto_ops = { + .family = PF_PPPOX, + .owner = THIS_MODULE, + .release = pppopns_release, + .bind = sock_no_bind, + .connect = pppopns_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = sock_no_poll, + .ioctl = pppox_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = sock_no_recvmsg, + .mmap = sock_no_mmap, +}; + +static int pppopns_create(struct net *net, struct socket *sock) +{ + struct sock *sk; + + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppopns_proto); + if (!sk) + return -ENOMEM; + + sock_init_data(sock, sk); + sock->state = SS_UNCONNECTED; + sock->ops = &pppopns_proto_ops; + sk->sk_protocol = PX_PROTO_OPNS; + sk->sk_state = PPPOX_NONE; + return 0; +} + +/******************************************************************************/ + +static struct pppox_proto pppopns_pppox_proto = { + .create = pppopns_create, + .owner = THIS_MODULE, +}; + +static int __init pppopns_init(void) +{ + int error; + + error = proto_register(&pppopns_proto, 0); + if (error) + return error; + + error = register_pppox_proto(PX_PROTO_OPNS, &pppopns_pppox_proto); + if (error) + proto_unregister(&pppopns_proto); + return error; +} + +static void __exit pppopns_exit(void) +{ + unregister_pppox_proto(PX_PROTO_OPNS); + proto_unregister(&pppopns_proto); +} + +module_init(pppopns_init); +module_exit(pppopns_exit); + +MODULE_DESCRIPTION("PPP on PPTP Network Server (PPPoPNS)"); +MODULE_AUTHOR("Chia-chi Yeh "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/if_pppopns.h b/include/linux/if_pppopns.h new file mode 100644 index 000000000000..0cf34b4d551f --- /dev/null +++ b/include/linux/if_pppopns.h @@ -0,0 +1,32 @@ +/* include/linux/if_pppopns.h + * + * Header for PPP on PPTP Network Server / PPPoPNS Socket (RFC 2637) + * + * Copyright (C) 2009 Google, Inc. + * Author: Chia-chi Yeh + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_IF_PPPOPNS_H +#define __LINUX_IF_PPPOPNS_H + +#include +#include + +struct sockaddr_pppopns { + sa_family_t sa_family; /* AF_PPPOX */ + unsigned int sa_protocol; /* PX_PROTO_OPNS */ + int tcp_socket; + __u16 local; + __u16 remote; +} __attribute__((packed)); + +#endif /* __LINUX_IF_PPPOPNS_H */ diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index fc9cb260af7f..2ac23c30bcfb 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -51,6 +51,12 @@ struct pppolac_opt { __u8 sequencing; }; +struct pppopns_opt { + __u16 local; + __u16 remote; + __u32 sequence; +}; + #include struct pppox_sock { @@ -62,6 +68,7 @@ struct pppox_sock { struct pppoe_opt pppoe; struct pptp_opt pptp; struct pppolac_opt lac; + struct pppopns_opt pns; } proto; __be16 num; }; diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index 956bd71841e2..6aad18a517d3 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -61,7 +62,8 @@ struct pptp_addr { #define PX_PROTO_OL2TP 1 /* Now L2TP also */ #define PX_PROTO_PPTP 2 #define PX_PROTO_OLAC 3 -#define PX_MAX_PROTO 4 +#define PX_PROTO_OPNS 4 +#define PX_MAX_PROTO 5 struct sockaddr_pppox { __kernel_sa_family_t sa_family; /* address family, AF_PPPOX */ -- GitLab From 32f7ae969b9e2881d034ca3009eb0bf28b067749 Mon Sep 17 00:00:00 2001 From: Chia-chi Yeh Date: Sat, 13 Jun 2009 02:29:04 +0800 Subject: [PATCH 0630/1442] ANDROID: net: PPPoPNS and PPPoLAC fixes. net: Fix a bitmask in PPPoPNS and rename constants in PPPoPNS and PPPoLAC. Signed-off-by: Chia-chi Yeh net: Fix a potential deadlock while releasing PPPoLAC/PPPoPNS socket. PPP driver guarantees that no thread will be executing start_xmit() after returning from ppp_unregister_channel(). To achieve this, a spinlock (downl) is used. In pppolac_release(), ppp_unregister_channel() is called after sk_udp is locked. At the same time, another thread might be running in pppolac_xmit() with downl. Thus a deadlock will occur if the thread tries to lock sk_udp. The same situation might happen on sk_raw in pppopns_release(). Signed-off-by: Chia-chi Yeh net: Force PPPoLAC and PPPoPNS to bind an interface before creating PPP channel. It is common to manipulate the routing table after configuring PPP device. Since both PPPoLAC and PPPoPNS run over IP, care must be taken to make sure that there is no loop in the routing table. Although this can be done by adding a host route, it might still cause problems when the interface is down for some reason. To solve this, this patch forces both drivers to bind an interface before creating PPP channel, so the system will not re-route the tunneling sockets to another interface when the original one is down. Another benefit is that now the host route is no longer required, so there is no need to remove it when PPP channel is closed. Signed-off-by: Chia-chi Yeh net: Avoid sleep-inside-spinlock in PPPoLAC and PPPoPNS. Since recv() and xmit() are called with a spinlock held, routines which might sleep cannot be used. This issue is solved by following changes: Incoming packets are now processed in backlog handler, recv_core(), instead of recv(). Since backlog handler is always executed with socket spinlock held, the requirement of ppp_input() is still satisfied. Outgoing packets are now processed in workqueue handler, xmit_core(), instead of xmit(). Note that kernel_sendmsg() is no longer used to prevent touching dead sockets. In release(), lock_sock() and pppox_unbind_sock() ensure that no thread is in recv_core() or xmit(). Then socket handlers are restored before release_sock(), so no packets will leak in backlog queue. Signed-off-by: Chia-chi Yeh net: Fix msg_iovlen in PPPoLAC and PPPoPNS. Although any positive value should work (which is always true in both drivers), the correct value should be 1. Signed-off-by: Chia-chi Yeh --- drivers/net/ppp/pppolac.c | 129 +++++++++++++++------------ drivers/net/ppp/pppopns.c | 177 +++++++++++++++++++++++--------------- include/linux/if_pppox.h | 3 + 3 files changed, 184 insertions(+), 125 deletions(-) diff --git a/drivers/net/ppp/pppolac.c b/drivers/net/ppp/pppolac.c index 8843a9d30911..af3202a920a0 100644 --- a/drivers/net/ppp/pppolac.c +++ b/drivers/net/ppp/pppolac.c @@ -3,7 +3,6 @@ * Driver for PPP on L2TP Access Concentrator / PPPoLAC Socket (RFC 2661) * * Copyright (C) 2009 Google, Inc. - * Author: Chia-chi Yeh * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -22,8 +21,10 @@ * only works on IPv4 due to the lack of UDP encapsulation support in IPv6. */ #include +#include #include #include +#include #include #include #include @@ -31,13 +32,14 @@ #include #include #include +#include -#define L2TP_CONTROL_MASK 0x80 -#define L2TP_VERSION_MASK 0x0F +#define L2TP_CONTROL_BIT 0x80 +#define L2TP_LENGTH_BIT 0x40 +#define L2TP_SEQUENCE_BIT 0x08 +#define L2TP_OFFSET_BIT 0x02 #define L2TP_VERSION 0x02 -#define L2TP_LENGTH_MASK 0x40 -#define L2TP_OFFSET_MASK 0x02 -#define L2TP_SEQUENCE_MASK 0x08 +#define L2TP_VERSION_MASK 0x0F #define PPP_ADDR 0xFF #define PPP_CTRL 0x03 @@ -51,10 +53,10 @@ static inline union unaligned *unaligned(void *ptr) return (union unaligned *)ptr; } -static int pppolac_recv(struct sock *sk_udp, struct sk_buff *skb) +static int pppolac_recv_core(struct sock *sk_udp, struct sk_buff *skb) { - struct sock *sk; - struct pppolac_opt *opt; + struct sock *sk = (struct sock *)sk_udp->sk_user_data; + struct pppolac_opt *opt = &pppox_sk(sk)->proto.lac; __u8 bits; __u8 *ptr; @@ -63,10 +65,10 @@ static int pppolac_recv(struct sock *sk_udp, struct sk_buff *skb) goto drop; /* Put it back if it is a control packet. */ - if (skb->data[sizeof(struct udphdr)] & L2TP_CONTROL_MASK) - return 1; + if (skb->data[sizeof(struct udphdr)] & L2TP_CONTROL_BIT) + return opt->backlog_rcv(sk_udp, skb); - /* Now the packet is ours. Skip UDP header. */ + /* Skip UDP header. */ skb_pull(skb, sizeof(struct udphdr)); /* Check the version. */ @@ -76,44 +78,30 @@ static int pppolac_recv(struct sock *sk_udp, struct sk_buff *skb) ptr = &skb->data[2]; /* Check the length if it is present. */ - if (bits & L2TP_LENGTH_MASK) { + if (bits & L2TP_LENGTH_BIT) { if ((ptr[0] << 8 | ptr[1]) != skb->len) goto drop; ptr += 2; } /* Skip all fields including optional ones. */ - if (!skb_pull(skb, 6 + (bits & L2TP_SEQUENCE_MASK ? 4 : 0) + - (bits & L2TP_LENGTH_MASK ? 2 : 0) + - (bits & L2TP_OFFSET_MASK ? 2 : 0))) + if (!skb_pull(skb, 6 + (bits & L2TP_SEQUENCE_BIT ? 4 : 0) + + (bits & L2TP_LENGTH_BIT ? 2 : 0) + + (bits & L2TP_OFFSET_BIT ? 2 : 0))) goto drop; /* Skip the offset padding if it is present. */ - if (bits & L2TP_OFFSET_MASK && + if (bits & L2TP_OFFSET_BIT && !skb_pull(skb, skb->data[-2] << 8 | skb->data[-1])) goto drop; - /* Now ptr is pointing to the tunnel and skb is pointing to the payload. - * We have to lock sk_udp to prevent sk from being closed. */ - lock_sock(sk_udp); - sk = sk_udp->sk_user_data; - if (!sk) { - release_sock(sk_udp); - goto drop; - } - sock_hold(sk); - release_sock(sk_udp); - opt = &pppox_sk(sk)->proto.lac; - /* Check the tunnel and the session. */ - if (unaligned(ptr)->u32 != opt->local) { - sock_put(sk); + if (unaligned(ptr)->u32 != opt->local) goto drop; - } - /* Check the sequence if it is present. According to RFC 2661 page 10 - * and 43, the only thing to do is updating opt->sequencing. */ - opt->sequencing = bits & L2TP_SEQUENCE_MASK; + /* Check the sequence if it is present. According to RFC 2661 section + * 5.4, the only thing to do is to update opt->sequencing. */ + opt->sequencing = bits & L2TP_SEQUENCE_BIT; /* Skip PPP address and control if they are present. */ if (skb->len >= 2 && skb->data[0] == PPP_ADDR && @@ -124,26 +112,50 @@ static int pppolac_recv(struct sock *sk_udp, struct sk_buff *skb) if (skb->len >= 1 && skb->data[0] & 1) skb_push(skb, 1)[0] = 0; - /* Finally, deliver the packet to PPP channel. We have to lock sk to - * prevent another thread from calling pppox_unbind_sock(). */ + /* Finally, deliver the packet to PPP channel. */ skb_orphan(skb); - lock_sock(sk); ppp_input(&pppox_sk(sk)->chan, skb); - release_sock(sk); - sock_put(sk); - return 0; - + return NET_RX_SUCCESS; drop: kfree_skb(skb); + return NET_RX_DROP; +} + +static int pppolac_recv(struct sock *sk_udp, struct sk_buff *skb) +{ + sock_hold(sk_udp); + sk_receive_skb(sk_udp, skb, 0); return 0; } +static struct sk_buff_head delivery_queue; + +static void pppolac_xmit_core(struct work_struct *delivery_work) +{ + mm_segment_t old_fs = get_fs(); + struct sk_buff *skb; + + set_fs(KERNEL_DS); + while ((skb = skb_dequeue(&delivery_queue))) { + struct sock *sk_udp = skb->sk; + struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len}; + struct msghdr msg = { + .msg_iov = (struct iovec *)&iov, + .msg_iovlen = 1, + .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT, + }; + sk_udp->sk_prot->sendmsg(NULL, sk_udp, &msg, skb->len); + kfree_skb(skb); + } + set_fs(old_fs); +} + +static DECLARE_WORK(delivery_work, pppolac_xmit_core); + static int pppolac_xmit(struct ppp_channel *chan, struct sk_buff *skb) { struct sock *sk_udp = (struct sock *)chan->private; struct pppolac_opt *opt = &pppox_sk(sk_udp->sk_user_data)->proto.lac; - struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT}; - struct kvec iov; /* Install PPP address and control. */ skb_push(skb, 2); @@ -153,7 +165,7 @@ static int pppolac_xmit(struct ppp_channel *chan, struct sk_buff *skb) /* Install L2TP header. */ if (opt->sequencing) { skb_push(skb, 10); - skb->data[0] = L2TP_SEQUENCE_MASK; + skb->data[0] = L2TP_SEQUENCE_BIT; skb->data[6] = opt->sequence >> 8; skb->data[7] = opt->sequence; skb->data[8] = 0; @@ -166,11 +178,10 @@ static int pppolac_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb->data[1] = L2TP_VERSION; unaligned(&skb->data[2])->u32 = opt->remote; - /* Now send the packet via UDP socket. */ - iov.iov_base = skb->data; - iov.iov_len = skb->len; - kernel_sendmsg(sk_udp->sk_socket, &msg, &iov, 1, skb->len); - kfree_skb(skb); + /* Now send the packet via the delivery queue. */ + skb_set_owner_w(skb, sk_udp); + skb_queue_tail(&delivery_queue, skb); + schedule_work(&delivery_work); return 1; } @@ -220,6 +231,14 @@ static int pppolac_connect(struct socket *sock, struct sockaddr *useraddr, error = -EBUSY; if (udp_sk(sk_udp)->encap_type || sk_udp->sk_user_data) goto out; + if (!sk_udp->sk_bound_dev_if) { + struct dst_entry *dst = sk_dst_get(sk_udp); + error = -ENODEV; + if (!dst) + goto out; + sk_udp->sk_bound_dev_if = dst->dev->ifindex; + dst_release(dst); + } po->chan.hdrlen = 12; po->chan.private = sk_udp; @@ -227,6 +246,7 @@ static int pppolac_connect(struct socket *sock, struct sockaddr *useraddr, po->chan.mtu = PPP_MTU - 80; po->proto.lac.local = unaligned(&addr->local)->u32; po->proto.lac.remote = unaligned(&addr->remote)->u32; + po->proto.lac.backlog_rcv = sk_udp->sk_backlog_rcv; error = ppp_register_channel(&po->chan); if (error) @@ -235,8 +255,8 @@ static int pppolac_connect(struct socket *sock, struct sockaddr *useraddr, sk->sk_state = PPPOX_CONNECTED; udp_sk(sk_udp)->encap_type = UDP_ENCAP_L2TPINUDP; udp_sk(sk_udp)->encap_rcv = pppolac_recv; + sk_udp->sk_backlog_rcv = pppolac_recv_core; sk_udp->sk_user_data = sk; - out: if (sock_udp) { release_sock(sk_udp); @@ -263,12 +283,11 @@ static int pppolac_release(struct socket *sock) if (sk->sk_state != PPPOX_NONE) { struct sock *sk_udp = (struct sock *)pppox_sk(sk)->chan.private; lock_sock(sk_udp); - pppox_unbind_sock(sk); - sk_udp->sk_user_data = NULL; udp_sk(sk_udp)->encap_type = 0; udp_sk(sk_udp)->encap_rcv = NULL; - + sk_udp->sk_backlog_rcv = pppox_sk(sk)->proto.lac.backlog_rcv; + sk_udp->sk_user_data = NULL; release_sock(sk_udp); sockfd_put(sk_udp->sk_socket); } @@ -342,6 +361,8 @@ static int __init pppolac_init(void) error = register_pppox_proto(PX_PROTO_OLAC, &pppolac_pppox_proto); if (error) proto_unregister(&pppolac_proto); + else + skb_queue_head_init(&delivery_queue); return error; } diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c index 8885eba8968e..298097127c90 100644 --- a/drivers/net/ppp/pppopns.c +++ b/drivers/net/ppp/pppopns.c @@ -3,7 +3,6 @@ * Driver for PPP on PPTP Network Server / PPPoPNS Socket (RFC 2637) * * Copyright (C) 2009 Google, Inc. - * Author: Chia-chi Yeh * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -22,20 +21,24 @@ * and IPv6. */ #include +#include #include #include +#include #include #include #include #include #include #include +#include #define GRE_HEADER_SIZE 8 -#define PPTP_GRE_MASK htons(0x2001) -#define PPTP_GRE_SEQ_MASK htons(0x1000) -#define PPTP_GRE_ACK_MASK htons(0x0080) +#define PPTP_GRE_BITS htons(0x2001) +#define PPTP_GRE_BITS_MASK htons(0xEF7F) +#define PPTP_GRE_SEQ_BIT htons(0x1000) +#define PPTP_GRE_ACK_BIT htons(0x0080) #define PPTP_GRE_TYPE htons(0x880B) #define PPP_ADDR 0xFF @@ -49,76 +52,90 @@ struct header { __u32 sequence; } __attribute__((packed)); -static void pppopns_recv(struct sock *sk_raw, int length) +static int pppopns_recv_core(struct sock *sk_raw, struct sk_buff *skb) { - struct sock *sk; - struct pppopns_opt *opt; - struct sk_buff *skb; + struct sock *sk = (struct sock *)sk_raw->sk_user_data; + struct pppopns_opt *opt = &pppox_sk(sk)->proto.pns; struct header *hdr; - /* Lock sk_raw to prevent sk from being closed. */ - lock_sock(sk_raw); - sk = (struct sock *)sk_raw->sk_user_data; - if (!sk) { - release_sock(sk_raw); - return; - } - sock_hold(sk); - release_sock(sk_raw); - opt = &pppox_sk(sk)->proto.pns; + /* Skip transport header */ + skb_pull(skb, skb_transport_header(skb) - skb->data); + + /* Drop the packet if it is too short. */ + if (skb->len < GRE_HEADER_SIZE) + goto drop; + + /* Check the header. */ + hdr = (struct header *)skb->data; + if (hdr->type != PPTP_GRE_TYPE || hdr->call != opt->local || + (hdr->bits & PPTP_GRE_BITS_MASK) != PPTP_GRE_BITS) + goto drop; + + /* Skip all fields including optional ones. */ + if (!skb_pull(skb, GRE_HEADER_SIZE + + (hdr->bits & PPTP_GRE_SEQ_BIT ? 4 : 0) + + (hdr->bits & PPTP_GRE_ACK_BIT ? 4 : 0))) + goto drop; + + /* Check the length. */ + if (skb->len != ntohs(hdr->length)) + goto drop; + + /* Skip PPP address and control if they are present. */ + if (skb->len >= 2 && skb->data[0] == PPP_ADDR && + skb->data[1] == PPP_CTRL) + skb_pull(skb, 2); + + /* Fix PPP protocol if it is compressed. */ + if (skb->len >= 1 && skb->data[0] & 1) + skb_push(skb, 1)[0] = 0; + + /* Finally, deliver the packet to PPP channel. */ + skb_orphan(skb); + ppp_input(&pppox_sk(sk)->chan, skb); + return NET_RX_SUCCESS; +drop: + kfree_skb(skb); + return NET_RX_DROP; +} - /* Process packets from the receive queue. */ +static void pppopns_recv(struct sock *sk_raw, int length) +{ + struct sk_buff *skb; while ((skb = skb_dequeue(&sk_raw->sk_receive_queue))) { - skb_pull(skb, skb_transport_header(skb) - skb->data); - - /* Drop the packet if it is too short. */ - if (skb->len < GRE_HEADER_SIZE) - goto drop; - - /* Check the header. */ - hdr = (struct header *)skb->data; - if (hdr->type != PPTP_GRE_TYPE || hdr->call != opt->local || - (hdr->bits & PPTP_GRE_MASK) != PPTP_GRE_MASK) - goto drop; - - /* Skip all fields including optional ones. */ - if (!skb_pull(skb, GRE_HEADER_SIZE + - (hdr->bits & PPTP_GRE_SEQ_MASK ? 4 : 0) + - (hdr->bits & PPTP_GRE_ACK_MASK ? 4 : 0))) - goto drop; - - /* Check the length. */ - if (skb->len != ntohs(hdr->length)) - goto drop; - - /* Skip PPP address and control if they are present. */ - if (skb->len >= 2 && skb->data[0] == PPP_ADDR && - skb->data[1] == PPP_CTRL) - skb_pull(skb, 2); - - /* Fix PPP protocol if it is compressed. */ - if (skb->len >= 1 && skb->data[0] & 1) - skb_push(skb, 1)[0] = 0; - - /* Deliver the packet to PPP channel. We have to lock sk to - * prevent another thread from calling pppox_unbind_sock(). */ - skb_orphan(skb); - lock_sock(sk); - ppp_input(&pppox_sk(sk)->chan, skb); - release_sock(sk); - continue; -drop: + sock_hold(sk_raw); + sk_receive_skb(sk_raw, skb, 0); + } +} + +static struct sk_buff_head delivery_queue; + +static void pppopns_xmit_core(struct work_struct *delivery_work) +{ + mm_segment_t old_fs = get_fs(); + struct sk_buff *skb; + + set_fs(KERNEL_DS); + while ((skb = skb_dequeue(&delivery_queue))) { + struct sock *sk_raw = skb->sk; + struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len}; + struct msghdr msg = { + .msg_iov = (struct iovec *)&iov, + .msg_iovlen = 1, + .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT, + }; + sk_raw->sk_prot->sendmsg(NULL, sk_raw, &msg, skb->len); kfree_skb(skb); } - sock_put(sk); + set_fs(old_fs); } +static DECLARE_WORK(delivery_work, pppopns_xmit_core); + static int pppopns_xmit(struct ppp_channel *chan, struct sk_buff *skb) { struct sock *sk_raw = (struct sock *)chan->private; struct pppopns_opt *opt = &pppox_sk(sk_raw->sk_user_data)->proto.pns; - struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT}; - struct kvec iov; struct header *hdr; __u16 length; @@ -130,18 +147,17 @@ static int pppopns_xmit(struct ppp_channel *chan, struct sk_buff *skb) /* Install PPTP GRE header. */ hdr = (struct header *)skb_push(skb, 12); - hdr->bits = PPTP_GRE_MASK | PPTP_GRE_SEQ_MASK; + hdr->bits = PPTP_GRE_BITS | PPTP_GRE_SEQ_BIT; hdr->type = PPTP_GRE_TYPE; hdr->length = htons(length); hdr->call = opt->remote; hdr->sequence = htonl(opt->sequence); opt->sequence++; - /* Now send the packet via RAW socket. */ - iov.iov_base = skb->data; - iov.iov_len = skb->len; - kernel_sendmsg(sk_raw->sk_socket, &msg, &iov, 1, skb->len); - kfree_skb(skb); + /* Now send the packet via the delivery queue. */ + skb_set_owner_w(skb, sk_raw); + skb_queue_tail(&delivery_queue, skb); + schedule_work(&delivery_work); return 1; } @@ -160,6 +176,7 @@ static int pppopns_connect(struct socket *sock, struct sockaddr *useraddr, struct sockaddr_storage ss; struct socket *sock_tcp = NULL; struct socket *sock_raw = NULL; + struct sock *sk_tcp; struct sock *sk_raw; int error; @@ -174,21 +191,31 @@ static int pppopns_connect(struct socket *sock, struct sockaddr *useraddr, sock_tcp = sockfd_lookup(addr->tcp_socket, &error); if (!sock_tcp) goto out; + sk_tcp = sock_tcp->sk; error = -EPROTONOSUPPORT; - if (sock_tcp->sk->sk_protocol != IPPROTO_TCP) + if (sk_tcp->sk_protocol != IPPROTO_TCP) goto out; addrlen = sizeof(struct sockaddr_storage); error = kernel_getpeername(sock_tcp, (struct sockaddr *)&ss, &addrlen); if (error) goto out; + if (!sk_tcp->sk_bound_dev_if) { + struct dst_entry *dst = sk_dst_get(sk_tcp); + error = -ENODEV; + if (!dst) + goto out; + sk_tcp->sk_bound_dev_if = dst->dev->ifindex; + dst_release(dst); + } error = sock_create(ss.ss_family, SOCK_RAW, IPPROTO_GRE, &sock_raw); if (error) goto out; + sk_raw = sock_raw->sk; + sk_raw->sk_bound_dev_if = sk_tcp->sk_bound_dev_if; error = kernel_connect(sock_raw, (struct sockaddr *)&ss, addrlen, 0); if (error) goto out; - sk_raw = sock_raw->sk; po->chan.hdrlen = 14; po->chan.private = sk_raw; @@ -196,15 +223,19 @@ static int pppopns_connect(struct socket *sock, struct sockaddr *useraddr, po->chan.mtu = PPP_MTU - 80; po->proto.pns.local = addr->local; po->proto.pns.remote = addr->remote; + po->proto.pns.data_ready = sk_raw->sk_data_ready; + po->proto.pns.backlog_rcv = sk_raw->sk_backlog_rcv; error = ppp_register_channel(&po->chan); if (error) goto out; sk->sk_state = PPPOX_CONNECTED; - sk_raw->sk_user_data = sk; + lock_sock(sk_raw); sk_raw->sk_data_ready = pppopns_recv; - + sk_raw->sk_backlog_rcv = pppopns_recv_core; + sk_raw->sk_user_data = sk; + release_sock(sk_raw); out: if (sock_tcp) sockfd_put(sock_tcp); @@ -231,6 +262,8 @@ static int pppopns_release(struct socket *sock) struct sock *sk_raw = (struct sock *)pppox_sk(sk)->chan.private; lock_sock(sk_raw); pppox_unbind_sock(sk); + sk_raw->sk_data_ready = pppox_sk(sk)->proto.pns.data_ready; + sk_raw->sk_backlog_rcv = pppox_sk(sk)->proto.pns.backlog_rcv; sk_raw->sk_user_data = NULL; release_sock(sk_raw); sock_release(sk_raw->sk_socket); @@ -305,6 +338,8 @@ static int __init pppopns_init(void) error = register_pppox_proto(PX_PROTO_OPNS, &pppopns_pppox_proto); if (error) proto_unregister(&pppopns_proto); + else + skb_queue_head_init(&delivery_queue); return error; } diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 2ac23c30bcfb..c9e95ae5695e 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -49,12 +49,15 @@ struct pppolac_opt { __u32 remote; __u16 sequence; __u8 sequencing; + int (*backlog_rcv)(struct sock *sk_udp, struct sk_buff *skb); }; struct pppopns_opt { __u16 local; __u16 remote; __u32 sequence; + void (*data_ready)(struct sock *sk_raw, int length); + int (*backlog_rcv)(struct sock *sk_raw, struct sk_buff *skb); }; #include -- GitLab From f6cd37560ee64c37f40a78fd09cf0271eb65d654 Mon Sep 17 00:00:00 2001 From: Chia-chi Yeh Date: Fri, 15 Apr 2011 15:22:09 -0700 Subject: [PATCH 0631/1442] ANDROID: net: Reorder incoming packets in PPPoLAC and PPPoPNS. PPP handles packet loss but does not work with out of order packets. This change performs reordering of incoming data packets within a sliding window of one second. Since sequence number is optional, receiving a packet without it will drop all queued packets. Currently the logic is triggered by incoming packets, so queued packets have to wait till another packet is arrived. It is done for simplicity since no additional locks or threads are required. For reliable protocols, a retransmission will kick it. For unreliable protocols, queued packets just seem like packet loss. Time-critical protocols might be broken, but they never work with queueing anyway. Signed-off-by: Chia-chi Yeh --- drivers/net/ppp/pppolac.c | 95 +++++++++++++++++++++++++++++++++------ drivers/net/ppp/pppopns.c | 87 +++++++++++++++++++++++++++++++---- include/linux/if_pppox.h | 22 ++++----- 3 files changed, 173 insertions(+), 31 deletions(-) diff --git a/drivers/net/ppp/pppolac.c b/drivers/net/ppp/pppolac.c index af3202a920a0..c94b8507d92b 100644 --- a/drivers/net/ppp/pppolac.c +++ b/drivers/net/ppp/pppolac.c @@ -15,12 +15,15 @@ */ /* This driver handles L2TP data packets between a UDP socket and a PPP channel. - * To keep things simple, only one session per socket is permitted. Packets are - * sent via the socket, so it must keep connected to the same address. One must - * not set sequencing in ICCN but let LNS controll it. Currently this driver - * only works on IPv4 due to the lack of UDP encapsulation support in IPv6. */ + * The socket must keep connected, and only one session per socket is permitted. + * Sequencing of outgoing packets is controlled by LNS. Incoming packets with + * sequences are reordered within a sliding window of one second. Currently + * reordering only happens when a packet is received. It is done for simplicity + * since no additional locks or threads are required. This driver only works on + * IPv4 due to the lack of UDP encapsulation support in IPv6. */ #include +#include #include #include #include @@ -53,14 +56,28 @@ static inline union unaligned *unaligned(void *ptr) return (union unaligned *)ptr; } +struct meta { + __u32 sequence; + __u32 timestamp; +}; + +static inline struct meta *skb_meta(struct sk_buff *skb) +{ + return (struct meta *)skb->cb; +} + +/******************************************************************************/ + static int pppolac_recv_core(struct sock *sk_udp, struct sk_buff *skb) { struct sock *sk = (struct sock *)sk_udp->sk_user_data; struct pppolac_opt *opt = &pppox_sk(sk)->proto.lac; + struct meta *meta = skb_meta(skb); + __u32 now = jiffies; __u8 bits; __u8 *ptr; - /* Drop the packet if it is too short. */ + /* Drop the packet if L2TP header is missing. */ if (skb->len < sizeof(struct udphdr) + 6) goto drop; @@ -99,9 +116,12 @@ static int pppolac_recv_core(struct sock *sk_udp, struct sk_buff *skb) if (unaligned(ptr)->u32 != opt->local) goto drop; - /* Check the sequence if it is present. According to RFC 2661 section - * 5.4, the only thing to do is to update opt->sequencing. */ - opt->sequencing = bits & L2TP_SEQUENCE_BIT; + /* Check the sequence if it is present. */ + if (bits & L2TP_SEQUENCE_BIT) { + meta->sequence = ptr[4] << 8 | ptr[5]; + if ((__s16)(meta->sequence - opt->recv_sequence) < 0) + goto drop; + } /* Skip PPP address and control if they are present. */ if (skb->len >= 2 && skb->data[0] == PPP_ADDR && @@ -112,7 +132,54 @@ static int pppolac_recv_core(struct sock *sk_udp, struct sk_buff *skb) if (skb->len >= 1 && skb->data[0] & 1) skb_push(skb, 1)[0] = 0; - /* Finally, deliver the packet to PPP channel. */ + /* Drop the packet if PPP protocol is missing. */ + if (skb->len < 2) + goto drop; + + /* Perform reordering if sequencing is enabled. */ + atomic_set(&opt->sequencing, bits & L2TP_SEQUENCE_BIT); + if (bits & L2TP_SEQUENCE_BIT) { + struct sk_buff *skb1; + + /* Insert the packet into receive queue in order. */ + skb_set_owner_r(skb, sk); + skb_queue_walk(&sk->sk_receive_queue, skb1) { + struct meta *meta1 = skb_meta(skb1); + __s16 order = meta->sequence - meta1->sequence; + if (order == 0) + goto drop; + if (order < 0) { + meta->timestamp = meta1->timestamp; + skb_insert(skb1, skb, &sk->sk_receive_queue); + skb = NULL; + break; + } + } + if (skb) { + meta->timestamp = now; + skb_queue_tail(&sk->sk_receive_queue, skb); + } + + /* Remove packets from receive queue as long as + * 1. the receive buffer is full, + * 2. they are queued longer than one second, or + * 3. there are no missing packets before them. */ + skb_queue_walk_safe(&sk->sk_receive_queue, skb, skb1) { + meta = skb_meta(skb); + if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && + now - meta->timestamp < HZ && + meta->sequence != opt->recv_sequence) + break; + skb_unlink(skb, &sk->sk_receive_queue); + opt->recv_sequence = (__u16)(meta->sequence + 1); + skb_orphan(skb); + ppp_input(&pppox_sk(sk)->chan, skb); + } + return NET_RX_SUCCESS; + } + + /* Flush receive queue if sequencing is disabled. */ + skb_queue_purge(&sk->sk_receive_queue); skb_orphan(skb); ppp_input(&pppox_sk(sk)->chan, skb); return NET_RX_SUCCESS; @@ -163,14 +230,14 @@ static int pppolac_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb->data[1] = PPP_CTRL; /* Install L2TP header. */ - if (opt->sequencing) { + if (atomic_read(&opt->sequencing)) { skb_push(skb, 10); skb->data[0] = L2TP_SEQUENCE_BIT; - skb->data[6] = opt->sequence >> 8; - skb->data[7] = opt->sequence; + skb->data[6] = opt->xmit_sequence >> 8; + skb->data[7] = opt->xmit_sequence; skb->data[8] = 0; skb->data[9] = 0; - opt->sequence++; + opt->xmit_sequence++; } else { skb_push(skb, 6); skb->data[0] = 0; @@ -246,6 +313,7 @@ static int pppolac_connect(struct socket *sock, struct sockaddr *useraddr, po->chan.mtu = PPP_MTU - 80; po->proto.lac.local = unaligned(&addr->local)->u32; po->proto.lac.remote = unaligned(&addr->remote)->u32; + atomic_set(&po->proto.lac.sequencing, 1); po->proto.lac.backlog_rcv = sk_udp->sk_backlog_rcv; error = ppp_register_channel(&po->chan); @@ -283,6 +351,7 @@ static int pppolac_release(struct socket *sock) if (sk->sk_state != PPPOX_NONE) { struct sock *sk_udp = (struct sock *)pppox_sk(sk)->chan.private; lock_sock(sk_udp); + skb_queue_purge(&sk->sk_receive_queue); pppox_unbind_sock(sk); udp_sk(sk_udp)->encap_type = 0; udp_sk(sk_udp)->encap_rcv = NULL; diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c index 298097127c90..fb8198447938 100644 --- a/drivers/net/ppp/pppopns.c +++ b/drivers/net/ppp/pppopns.c @@ -16,11 +16,14 @@ /* This driver handles PPTP data packets between a RAW socket and a PPP channel. * The socket is created in the kernel space and connected to the same address - * of the control socket. To keep things simple, packets are always sent with - * sequence but without acknowledgement. This driver should work on both IPv4 - * and IPv6. */ + * of the control socket. Outgoing packets are always sent with sequences but + * without acknowledgements. Incoming packets with sequences are reordered + * within a sliding window of one second. Currently reordering only happens when + * a packet is received. It is done for simplicity since no additional locks or + * threads are required. This driver should work on both IPv4 and IPv6. */ #include +#include #include #include #include @@ -52,21 +55,35 @@ struct header { __u32 sequence; } __attribute__((packed)); +struct meta { + __u32 sequence; + __u32 timestamp; +}; + +static inline struct meta *skb_meta(struct sk_buff *skb) +{ + return (struct meta *)skb->cb; +} + +/******************************************************************************/ + static int pppopns_recv_core(struct sock *sk_raw, struct sk_buff *skb) { struct sock *sk = (struct sock *)sk_raw->sk_user_data; struct pppopns_opt *opt = &pppox_sk(sk)->proto.pns; + struct meta *meta = skb_meta(skb); + __u32 now = jiffies; struct header *hdr; /* Skip transport header */ skb_pull(skb, skb_transport_header(skb) - skb->data); - /* Drop the packet if it is too short. */ + /* Drop the packet if GRE header is missing. */ if (skb->len < GRE_HEADER_SIZE) goto drop; + hdr = (struct header *)skb->data; /* Check the header. */ - hdr = (struct header *)skb->data; if (hdr->type != PPTP_GRE_TYPE || hdr->call != opt->local || (hdr->bits & PPTP_GRE_BITS_MASK) != PPTP_GRE_BITS) goto drop; @@ -81,6 +98,13 @@ static int pppopns_recv_core(struct sock *sk_raw, struct sk_buff *skb) if (skb->len != ntohs(hdr->length)) goto drop; + /* Check the sequence if it is present. */ + if (hdr->bits & PPTP_GRE_SEQ_BIT) { + meta->sequence = ntohl(hdr->sequence); + if ((__s32)(meta->sequence - opt->recv_sequence) < 0) + goto drop; + } + /* Skip PPP address and control if they are present. */ if (skb->len >= 2 && skb->data[0] == PPP_ADDR && skb->data[1] == PPP_CTRL) @@ -90,7 +114,53 @@ static int pppopns_recv_core(struct sock *sk_raw, struct sk_buff *skb) if (skb->len >= 1 && skb->data[0] & 1) skb_push(skb, 1)[0] = 0; - /* Finally, deliver the packet to PPP channel. */ + /* Drop the packet if PPP protocol is missing. */ + if (skb->len < 2) + goto drop; + + /* Perform reordering if sequencing is enabled. */ + if (hdr->bits & PPTP_GRE_SEQ_BIT) { + struct sk_buff *skb1; + + /* Insert the packet into receive queue in order. */ + skb_set_owner_r(skb, sk); + skb_queue_walk(&sk->sk_receive_queue, skb1) { + struct meta *meta1 = skb_meta(skb1); + __s32 order = meta->sequence - meta1->sequence; + if (order == 0) + goto drop; + if (order < 0) { + meta->timestamp = meta1->timestamp; + skb_insert(skb1, skb, &sk->sk_receive_queue); + skb = NULL; + break; + } + } + if (skb) { + meta->timestamp = now; + skb_queue_tail(&sk->sk_receive_queue, skb); + } + + /* Remove packets from receive queue as long as + * 1. the receive buffer is full, + * 2. they are queued longer than one second, or + * 3. there are no missing packets before them. */ + skb_queue_walk_safe(&sk->sk_receive_queue, skb, skb1) { + meta = skb_meta(skb); + if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && + now - meta->timestamp < HZ && + meta->sequence != opt->recv_sequence) + break; + skb_unlink(skb, &sk->sk_receive_queue); + opt->recv_sequence = meta->sequence + 1; + skb_orphan(skb); + ppp_input(&pppox_sk(sk)->chan, skb); + } + return NET_RX_SUCCESS; + } + + /* Flush receive queue if sequencing is disabled. */ + skb_queue_purge(&sk->sk_receive_queue); skb_orphan(skb); ppp_input(&pppox_sk(sk)->chan, skb); return NET_RX_SUCCESS; @@ -151,8 +221,8 @@ static int pppopns_xmit(struct ppp_channel *chan, struct sk_buff *skb) hdr->type = PPTP_GRE_TYPE; hdr->length = htons(length); hdr->call = opt->remote; - hdr->sequence = htonl(opt->sequence); - opt->sequence++; + hdr->sequence = htonl(opt->xmit_sequence); + opt->xmit_sequence++; /* Now send the packet via the delivery queue. */ skb_set_owner_w(skb, sk_raw); @@ -261,6 +331,7 @@ static int pppopns_release(struct socket *sock) if (sk->sk_state != PPPOX_NONE) { struct sock *sk_raw = (struct sock *)pppox_sk(sk)->chan.private; lock_sock(sk_raw); + skb_queue_purge(&sk->sk_receive_queue); pppox_unbind_sock(sk); sk_raw->sk_data_ready = pppox_sk(sk)->proto.pns.data_ready; sk_raw->sk_backlog_rcv = pppox_sk(sk)->proto.pns.backlog_rcv; diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index c9e95ae5695e..960ad12d042b 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -45,19 +45,21 @@ struct pptp_opt { }; struct pppolac_opt { - __u32 local; - __u32 remote; - __u16 sequence; - __u8 sequencing; - int (*backlog_rcv)(struct sock *sk_udp, struct sk_buff *skb); + __u32 local; + __u32 remote; + __u32 recv_sequence; + __u32 xmit_sequence; + atomic_t sequencing; + int (*backlog_rcv)(struct sock *sk_udp, struct sk_buff *skb); }; struct pppopns_opt { - __u16 local; - __u16 remote; - __u32 sequence; - void (*data_ready)(struct sock *sk_raw, int length); - int (*backlog_rcv)(struct sock *sk_raw, struct sk_buff *skb); + __u16 local; + __u16 remote; + __u32 recv_sequence; + __u32 xmit_sequence; + void (*data_ready)(struct sock *sk_raw, int length); + int (*backlog_rcv)(struct sock *sk_raw, struct sk_buff *skb); }; #include -- GitLab From bda61e596aee5320ea451cf7098f2d38a1bdd331 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Thu, 20 Sep 2012 16:34:10 -0700 Subject: [PATCH 0632/1442] ANDROID: net: PPPoPNS and PPPoLAC update to use PPP_MRU instead of PPP_MRU Some headers files were moved around and some defines renamed. Signed-off-by: JP Abgrall --- drivers/net/ppp/pppolac.c | 2 +- drivers/net/ppp/pppopns.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/pppolac.c b/drivers/net/ppp/pppolac.c index c94b8507d92b..a5d3d634fd9a 100644 --- a/drivers/net/ppp/pppolac.c +++ b/drivers/net/ppp/pppolac.c @@ -310,7 +310,7 @@ static int pppolac_connect(struct socket *sock, struct sockaddr *useraddr, po->chan.hdrlen = 12; po->chan.private = sk_udp; po->chan.ops = &pppolac_channel_ops; - po->chan.mtu = PPP_MTU - 80; + po->chan.mtu = PPP_MRU - 80; po->proto.lac.local = unaligned(&addr->local)->u32; po->proto.lac.remote = unaligned(&addr->remote)->u32; atomic_set(&po->proto.lac.sequencing, 1); diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c index fb8198447938..6016d29c0660 100644 --- a/drivers/net/ppp/pppopns.c +++ b/drivers/net/ppp/pppopns.c @@ -290,7 +290,7 @@ static int pppopns_connect(struct socket *sock, struct sockaddr *useraddr, po->chan.hdrlen = 14; po->chan.private = sk_raw; po->chan.ops = &pppopns_channel_ops; - po->chan.mtu = PPP_MTU - 80; + po->chan.mtu = PPP_MRU - 80; po->proto.pns.local = addr->local; po->proto.pns.remote = addr->remote; po->proto.pns.data_ready = sk_raw->sk_data_ready; -- GitLab From 442a72bdfbc7c3957f91c9d9054ef1da8acbc3fb Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 23 Mar 2012 21:48:02 +0400 Subject: [PATCH 0633/1442] ANDROID: Include if_pppolac.h and if_pppopns.h into header-y target This is required to pass the headers_check Change-Id: Ic4c773973278cbdf1cb4eb66473826cb96ccbfb3 Signed-off-by: Andrey Konovalov --- include/linux/Kbuild | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 include/linux/Kbuild diff --git a/include/linux/Kbuild b/include/linux/Kbuild new file mode 100644 index 000000000000..a4608897a5f3 --- /dev/null +++ b/include/linux/Kbuild @@ -0,0 +1,2 @@ +header-y += if_pppolac.h +header-y += if_pppopns.h -- GitLab From 19205d2ca0a207404ee2dc526fd2bbb99fb6faf0 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Thu, 7 Nov 2013 13:19:34 -0800 Subject: [PATCH 0634/1442] ANDROID: net: move PPPoLAC and PPPoPNS headers to uapi Move the entire contents of the linux/if_pppolac.h and linux/if_pppopns.h headers to uapi, they only contain userspace interfaces. Change-Id: I3cfed7f2ae400b53269a1f59144aa3dbc30ae0b5 Signed-off-by: Colin Cross --- include/linux/if_pppolac.h | 12 +----------- include/linux/if_pppopns.h | 11 +---------- include/uapi/linux/if_pppolac.h | 33 +++++++++++++++++++++++++++++++++ include/uapi/linux/if_pppopns.h | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 21 deletions(-) create mode 100644 include/uapi/linux/if_pppolac.h create mode 100644 include/uapi/linux/if_pppopns.h diff --git a/include/linux/if_pppolac.h b/include/linux/if_pppolac.h index c06bd6c8ba26..e40aa1075a30 100644 --- a/include/linux/if_pppolac.h +++ b/include/linux/if_pppolac.h @@ -18,16 +18,6 @@ #ifndef __LINUX_IF_PPPOLAC_H #define __LINUX_IF_PPPOLAC_H -#include -#include - -struct sockaddr_pppolac { - sa_family_t sa_family; /* AF_PPPOX */ - unsigned int sa_protocol; /* PX_PROTO_OLAC */ - int udp_socket; - struct __attribute__((packed)) { - __u16 tunnel, session; - } local, remote; -} __attribute__((packed)); +#include #endif /* __LINUX_IF_PPPOLAC_H */ diff --git a/include/linux/if_pppopns.h b/include/linux/if_pppopns.h index 0cf34b4d551f..4ac621a9ce7c 100644 --- a/include/linux/if_pppopns.h +++ b/include/linux/if_pppopns.h @@ -18,15 +18,6 @@ #ifndef __LINUX_IF_PPPOPNS_H #define __LINUX_IF_PPPOPNS_H -#include -#include - -struct sockaddr_pppopns { - sa_family_t sa_family; /* AF_PPPOX */ - unsigned int sa_protocol; /* PX_PROTO_OPNS */ - int tcp_socket; - __u16 local; - __u16 remote; -} __attribute__((packed)); +#include #endif /* __LINUX_IF_PPPOPNS_H */ diff --git a/include/uapi/linux/if_pppolac.h b/include/uapi/linux/if_pppolac.h new file mode 100644 index 000000000000..b7eb8153ef66 --- /dev/null +++ b/include/uapi/linux/if_pppolac.h @@ -0,0 +1,33 @@ +/* include/uapi/linux/if_pppolac.h + * + * Header for PPP on L2TP Access Concentrator / PPPoLAC Socket (RFC 2661) + * + * Copyright (C) 2009 Google, Inc. + * Author: Chia-chi Yeh + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _UAPI_LINUX_IF_PPPOLAC_H +#define _UAPI_LINUX_IF_PPPOLAC_H + +#include +#include + +struct sockaddr_pppolac { + sa_family_t sa_family; /* AF_PPPOX */ + unsigned int sa_protocol; /* PX_PROTO_OLAC */ + int udp_socket; + struct __attribute__((packed)) { + __u16 tunnel, session; + } local, remote; +} __attribute__((packed)); + +#endif /* _UAPI_LINUX_IF_PPPOLAC_H */ diff --git a/include/uapi/linux/if_pppopns.h b/include/uapi/linux/if_pppopns.h new file mode 100644 index 000000000000..a392b52ea6ec --- /dev/null +++ b/include/uapi/linux/if_pppopns.h @@ -0,0 +1,32 @@ +/* include/uapi/linux/if_pppopns.h + * + * Header for PPP on PPTP Network Server / PPPoPNS Socket (RFC 2637) + * + * Copyright (C) 2009 Google, Inc. + * Author: Chia-chi Yeh + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _UAPI_LINUX_IF_PPPOPNS_H +#define _UAPI_LINUX_IF_PPPOPNS_H + +#include +#include + +struct sockaddr_pppopns { + sa_family_t sa_family; /* AF_PPPOX */ + unsigned int sa_protocol; /* PX_PROTO_OPNS */ + int tcp_socket; + __u16 local; + __u16 remote; +} __attribute__((packed)); + +#endif /* _UAPI_LINUX_IF_PPPOPNS_H */ -- GitLab From 30cd6394b8e1ae6f751b746ea24779fa812db170 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Wed, 19 Aug 2015 13:43:16 +0100 Subject: [PATCH 0635/1442] ANDROID: net: PPPoPNS: Remove length argument from data_ready The argument was removed by commit 676d23690fb6 ("net: Fix use after free by removing length arg from sk_data_ready callbacks") and it's presence causes warnings like: drivers/net/ppp/pppopns.c:296:27: warning: assignment from incompatible pointer type po->proto.pns.data_ready = sk_raw->sk_data_ready; Signed-off-by: Jon Medhurst Signed-off-by: Amit Pundir --- drivers/net/ppp/pppopns.c | 2 +- include/linux/if_pppox.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c index 6016d29c0660..dc15f978c922 100644 --- a/drivers/net/ppp/pppopns.c +++ b/drivers/net/ppp/pppopns.c @@ -169,7 +169,7 @@ static int pppopns_recv_core(struct sock *sk_raw, struct sk_buff *skb) return NET_RX_DROP; } -static void pppopns_recv(struct sock *sk_raw, int length) +static void pppopns_recv(struct sock *sk_raw) { struct sk_buff *skb; while ((skb = skb_dequeue(&sk_raw->sk_receive_queue))) { diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 960ad12d042b..325727a7096a 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -58,7 +58,7 @@ struct pppopns_opt { __u16 remote; __u32 recv_sequence; __u32 xmit_sequence; - void (*data_ready)(struct sock *sk_raw, int length); + void (*data_ready)(struct sock *sk_raw); int (*backlog_rcv)(struct sock *sk_raw, struct sk_buff *skb); }; -- GitLab From 85d63881d53c7436964c5badda9fa4296a3c3e64 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 8 Dec 2015 18:26:39 +0530 Subject: [PATCH 0636/1442] ANDROID: net: pppopns: pppolac: fix sendmsg function calls Fix couple of sendmsg() calls to align with changes from upstream commit 1b784140474e "net: Remove iocb argument from sendmsg and recvmsg". Change-Id: I85bc46130af8decfa37abe65aec33053ed39f1b1 Signed-off-by: Amit Pundir Signed-off-by: Dmitry Shmidt --- drivers/net/ppp/pppolac.c | 2 +- drivers/net/ppp/pppopns.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/pppolac.c b/drivers/net/ppp/pppolac.c index a5d3d634fd9a..a178c863f7cf 100644 --- a/drivers/net/ppp/pppolac.c +++ b/drivers/net/ppp/pppolac.c @@ -211,7 +211,7 @@ static void pppolac_xmit_core(struct work_struct *delivery_work) .msg_iovlen = 1, .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT, }; - sk_udp->sk_prot->sendmsg(NULL, sk_udp, &msg, skb->len); + sk_udp->sk_prot->sendmsg(sk_udp, &msg, skb->len); kfree_skb(skb); } set_fs(old_fs); diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c index dc15f978c922..55f485a39b88 100644 --- a/drivers/net/ppp/pppopns.c +++ b/drivers/net/ppp/pppopns.c @@ -194,7 +194,7 @@ static void pppopns_xmit_core(struct work_struct *delivery_work) .msg_iovlen = 1, .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT, }; - sk_raw->sk_prot->sendmsg(NULL, sk_raw, &msg, skb->len); + sk_raw->sk_prot->sendmsg(sk_raw, &msg, skb->len); kfree_skb(skb); } set_fs(old_fs); -- GitLab From 028c8d14e9a4d10989dc464bee036b83fdc451b3 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 8 Dec 2015 18:28:40 +0530 Subject: [PATCH 0637/1442] ANDROID: Hack: net: PPPoPNS and PPPoLAC build fixes for 4.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream commit c0371da6047a "put iov_iter into msghdr", added iov_iter and removed direct access to scatter/gather array elements in msghdr. It broke PPPoLAC and PPPoPNS. Lets restore the direct access to scatter/gather array in msghdr for the time being. Otherwise we run into following build failure: ---------- drivers/net/ppp/pppolac.c: In function ‘pppolac_xmit_core’: drivers/net/ppp/pppolac.c:210:4: error: unknown field ‘msg_iov’ specified in initializer .msg_iov = (struct iovec *)&iov, ^ drivers/net/ppp/pppolac.c:211:4: error: unknown field ‘msg_iovlen’ specified in initializer .msg_iovlen = 1, ^ make[3]: *** [drivers/net/ppp/pppolac.o] Error 1 ---------- Change-Id: I2a1245a156da6d93b49f5cfd10506381b0eff005 Signed-off-by: Amit Pundir Signed-off-by: Dmitry Shmidt --- include/linux/socket.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/socket.h b/include/linux/socket.h index b5cc5a6d7011..09524774ea3f 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -47,6 +47,10 @@ struct linger { struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ +#if defined(CONFIG_PPPOLAC) || defined(CONFIG_PPPOPNS) + struct iovec *msg_iov; /* scatter/gather array */ + __kernel_size_t msg_iovlen; /* # elements in msg_iov */ +#endif struct iov_iter msg_iter; /* data */ void *msg_control; /* ancillary data */ __kernel_size_t msg_controllen; /* ancillary data buffer length */ -- GitLab From a7e707ed5c30c636692e8daecd2810065dbf7223 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 8 Dec 2015 12:47:01 +0530 Subject: [PATCH 0638/1442] ANDROID: net: PPPoPNS and PPPoLAC build fixes for 4.4 Fix couple of sk_alloc() calls to align with mainline commit 11aa9c28b420 "net: Pass kern from net_proto_family.create to sk_alloc". Signed-off-by: Amit Pundir --- drivers/net/ppp/pppolac.c | 4 ++-- drivers/net/ppp/pppopns.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ppp/pppolac.c b/drivers/net/ppp/pppolac.c index a178c863f7cf..1b8180cc1d4d 100644 --- a/drivers/net/ppp/pppolac.c +++ b/drivers/net/ppp/pppolac.c @@ -396,11 +396,11 @@ static struct proto_ops pppolac_proto_ops = { .mmap = sock_no_mmap, }; -static int pppolac_create(struct net *net, struct socket *sock) +static int pppolac_create(struct net *net, struct socket *sock, int kern) { struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppolac_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppolac_proto, kern); if (!sk) return -ENOMEM; diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c index 55f485a39b88..568bb45cfeac 100644 --- a/drivers/net/ppp/pppopns.c +++ b/drivers/net/ppp/pppopns.c @@ -375,11 +375,11 @@ static struct proto_ops pppopns_proto_ops = { .mmap = sock_no_mmap, }; -static int pppopns_create(struct net *net, struct socket *sock) +static int pppopns_create(struct net *net, struct socket *sock, int kern) { struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppopns_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppopns_proto, kern); if (!sk) return -ENOMEM; -- GitLab From baf0db430a512725c30ad2a6a41c2e0a44860a97 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Mon, 20 Jun 2011 12:41:46 -0700 Subject: [PATCH 0639/1442] ANDROID: netfilter: add xt_qtaguid matching module This module allows tracking stats at the socket level for given UIDs. It replaces xt_owner. If the --uid-owner is not specified, it will just count stats based on who the skb belongs to. This will even happen on incoming skbs as it looks into the skb via xt_socket magic to see who owns it. If an skb is lost, it will be assigned to uid=0. To control what sockets of what UIDs are tagged by what, one uses: echo t $sock_fd $accounting_tag $the_billed_uid \ > /proc/net/xt_qtaguid/ctrl So whenever an skb belongs to a sock_fd, it will be accounted against $the_billed_uid and matching stats will show up under the uid with the given $accounting_tag. Because the number of allocations for the stats structs is not that big: ~500 apps * 32 per app we'll just do it atomic. This avoids walking lists many times, and the fancy worker thread handling. Slabs will grow when needed later. It use netdevice and inetaddr notifications instead of hooks in the core dev code to track when a device comes and goes. This removes the need for exposed iface_stat.h. Put procfs dirs in /proc/net/xt_qtaguid/ ctrl stats iface_stat//... The uid stats are obtainable in ./stats. Change-Id: I01af4fd91c8de651668d3decb76d9bdc1e343919 Signed-off-by: JP Abgrall --- include/linux/android_aid.h | 2 + include/linux/netfilter/xt_qtaguid.h | 13 + net/netfilter/Kconfig | 18 + net/netfilter/Makefile | 1 + net/netfilter/xt_qtaguid.c | 2785 ++++++++++++++++++++++++++ net/netfilter/xt_qtaguid_internal.h | 330 +++ net/netfilter/xt_qtaguid_print.c | 556 +++++ net/netfilter/xt_qtaguid_print.h | 120 ++ 8 files changed, 3825 insertions(+) create mode 100644 include/linux/netfilter/xt_qtaguid.h create mode 100644 net/netfilter/xt_qtaguid.c create mode 100644 net/netfilter/xt_qtaguid_internal.h create mode 100644 net/netfilter/xt_qtaguid_print.c create mode 100644 net/netfilter/xt_qtaguid_print.h diff --git a/include/linux/android_aid.h b/include/linux/android_aid.h index 3d7a5ead1200..6f1fa1792dfc 100644 --- a/include/linux/android_aid.h +++ b/include/linux/android_aid.h @@ -22,5 +22,7 @@ #define AID_INET KGIDT_INIT(3003) #define AID_NET_RAW KGIDT_INIT(3004) #define AID_NET_ADMIN KGIDT_INIT(3005) +#define AID_NET_BW_STATS KGIDT_INIT(3006) /* read bandwidth statistics */ +#define AID_NET_BW_ACCT KGIDT_INIT(3007) /* change bandwidth statistics accounting */ #endif diff --git a/include/linux/netfilter/xt_qtaguid.h b/include/linux/netfilter/xt_qtaguid.h new file mode 100644 index 000000000000..ca60fbdec2f3 --- /dev/null +++ b/include/linux/netfilter/xt_qtaguid.h @@ -0,0 +1,13 @@ +#ifndef _XT_QTAGUID_MATCH_H +#define _XT_QTAGUID_MATCH_H + +/* For now we just replace the xt_owner. + * FIXME: make iptables aware of qtaguid. */ +#include + +#define XT_QTAGUID_UID XT_OWNER_UID +#define XT_QTAGUID_GID XT_OWNER_GID +#define XT_QTAGUID_SOCKET XT_OWNER_SOCKET +#define xt_qtaguid_match_info xt_owner_match_info + +#endif /* _XT_QTAGUID_MATCH_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e8d56d9a4df2..8945065a89b8 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1316,6 +1316,8 @@ config NETFILTER_XT_MATCH_OWNER based on who created the socket: the user or group. It is also possible to check whether a socket actually exists. + Conflicts with '"quota, tag, uid" match' + config NETFILTER_XT_MATCH_POLICY tristate 'IPsec "policy" match support' depends on XFRM @@ -1349,6 +1351,22 @@ config NETFILTER_XT_MATCH_PKTTYPE To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_QTAGUID + bool '"quota, tag, owner" match and stats support' + depends on NETFILTER_XT_MATCH_SOCKET + depends on NETFILTER_XT_MATCH_OWNER=n + help + This option replaces the `owner' match. In addition to matching + on uid, it keeps stats based on a tag assigned to a socket. + The full tag is comprised of a UID and an accounting tag. + The tags are assignable to sockets from user space (e.g. a download + manager can assign the socket to another UID for accounting). + Stats and control are done via /proc/net/xt_qtaguid/. + It replaces owner as it takes the same arguments, but should + really be recognized by the iptables tool. + + If unsure, say `N'. + config NETFILTER_XT_MATCH_QUOTA tristate '"quota" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c23c3c84416f..59ceaa8ec204 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -167,6 +167,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CGROUP) += xt_cgroup.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o +obj-$(CONFIG_NETFILTER_XT_MATCH_QTAGUID) += xt_qtaguid_print.o xt_qtaguid.o obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c new file mode 100644 index 000000000000..b0a221806878 --- /dev/null +++ b/net/netfilter/xt_qtaguid.c @@ -0,0 +1,2785 @@ +/* + * Kernel iptables module to track stats for packets based on user tags. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * There are run-time debug flags enabled via the debug_mask module param, or + * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h. + */ +#define DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "xt_qtaguid_internal.h" +#include "xt_qtaguid_print.h" + +/* + * We only use the xt_socket funcs within a similar context to avoid unexpected + * return values. + */ +#define XT_SOCKET_SUPPORTED_HOOKS \ + ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) + + +static const char *module_procdirname = "xt_qtaguid"; +static struct proc_dir_entry *xt_qtaguid_procdir; + +static unsigned int proc_iface_perms = S_IRUGO; +module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR); + +static struct proc_dir_entry *xt_qtaguid_stats_file; +static unsigned int proc_stats_perms = S_IRUGO; +module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); + +static struct proc_dir_entry *xt_qtaguid_ctrl_file; +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO; +#else +static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR; +#endif +module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); + +#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#include +static gid_t proc_stats_readall_gid = AID_NET_BW_STATS; +static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT; +#else +/* 0 means, don't limit anybody */ +static gid_t proc_stats_readall_gid; +static gid_t proc_ctrl_write_gid; +#endif +module_param_named(stats_readall_gid, proc_stats_readall_gid, uint, + S_IRUGO | S_IWUSR); +module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint, + S_IRUGO | S_IWUSR); + +/* + * Limit the number of active tags (via socket tags) for a given UID. + * Multiple processes could share the UID. + */ +static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS; +module_param(max_sock_tags, int, S_IRUGO | S_IWUSR); + +/* + * After the kernel has initiallized this module, it is still possible + * to make it passive. + * Setting passive to Y: + * - the iface stats handling will not act on notifications. + * - iptables matches will never match. + * - ctrl commands silently succeed. + * - stats are always empty. + * This is mostly usefull when a bug is suspected. + */ +static bool module_passive; +module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR); + +/* + * Control how qtaguid data is tracked per proc/uid. + * Setting tag_tracking_passive to Y: + * - don't create proc specific structs to track tags + * - don't check that active tag stats exceed some limits. + * - don't clean up socket tags on process exits. + * This is mostly usefull when a bug is suspected. + */ +static bool qtu_proc_handling_passive; +module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool, + S_IRUGO | S_IWUSR); + +#define QTU_DEV_NAME "xt_qtaguid" + +uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK; +module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR); + +/*---------------------------------------------------------------------------*/ +static const char *iface_stat_procdirname = "iface_stat"; +static struct proc_dir_entry *iface_stat_procdir; +static const char *iface_stat_all_procfilename = "iface_stat_all"; +static struct proc_dir_entry *iface_stat_all_procfile; + +/* + * Ordering of locks: + * outer locks: + * iface_stat_list_lock + * sock_tag_list_lock + * inner locks: + * uid_tag_data_tree_lock + * tag_counter_set_list_lock + * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock + * is acquired. + * + * Call tree with all lock holders as of 2011-09-25: + * + * iface_stat_all_proc_read() + * iface_stat_list_lock + * (struct iface_stat) + * + * qtaguid_ctrl_proc_read() + * sock_tag_list_lock + * (sock_tag_tree) + * (struct proc_qtu_data->sock_tag_list) + * prdebug_full_state() + * sock_tag_list_lock + * (sock_tag_tree) + * uid_tag_data_tree_lock + * (uid_tag_data_tree) + * (proc_qtu_data_tree) + * iface_stat_list_lock + * + * qtaguid_stats_proc_read() + * iface_stat_list_lock + * struct iface_stat->tag_stat_list_lock + * + * qtudev_open() + * uid_tag_data_tree_lock + * + * qtudev_release() + * sock_tag_data_list_lock + * uid_tag_data_tree_lock + * prdebug_full_state() + * sock_tag_list_lock + * uid_tag_data_tree_lock + * iface_stat_list_lock + * + * iface_netdev_event_handler() + * iface_stat_create() + * iface_stat_list_lock + * iface_stat_update() + * iface_stat_list_lock + * + * iface_inetaddr_event_handler() + * iface_stat_create() + * iface_stat_list_lock + * iface_stat_update() + * iface_stat_list_lock + * + * iface_inet6addr_event_handler() + * iface_stat_create_ipv6() + * iface_stat_list_lock + * iface_stat_update() + * iface_stat_list_lock + * + * qtaguid_mt() + * account_for_uid() + * if_tag_stat_update() + * get_sock_stat() + * sock_tag_list_lock + * struct iface_stat->tag_stat_list_lock + * tag_stat_update() + * get_active_counter_set() + * tag_counter_set_list_lock + * tag_stat_update() + * get_active_counter_set() + * tag_counter_set_list_lock + * + * + * qtaguid_ctrl_parse() + * ctrl_cmd_delete() + * sock_tag_list_lock + * tag_counter_set_list_lock + * iface_stat_list_lock + * struct iface_stat->tag_stat_list_lock + * uid_tag_data_tree_lock + * ctrl_cmd_counter_set() + * tag_counter_set_list_lock + * ctrl_cmd_tag() + * sock_tag_list_lock + * (sock_tag_tree) + * get_tag_ref() + * uid_tag_data_tree_lock + * (uid_tag_data_tree) + * uid_tag_data_tree_lock + * (proc_qtu_data_tree) + * ctrl_cmd_untag() + * sock_tag_list_lock + * uid_tag_data_tree_lock + * + */ +static LIST_HEAD(iface_stat_list); +static DEFINE_SPINLOCK(iface_stat_list_lock); + +static struct rb_root sock_tag_tree = RB_ROOT; +static DEFINE_SPINLOCK(sock_tag_list_lock); + +static struct rb_root tag_counter_set_tree = RB_ROOT; +static DEFINE_SPINLOCK(tag_counter_set_list_lock); + +static struct rb_root uid_tag_data_tree = RB_ROOT; +static DEFINE_SPINLOCK(uid_tag_data_tree_lock); + +static struct rb_root proc_qtu_data_tree = RB_ROOT; +/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */ + +static struct qtaguid_event_counts qtu_events; +/*----------------------------------------------*/ +static bool can_manipulate_uids(void) +{ + /* root pwnd */ + return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid) + || in_egroup_p(proc_ctrl_write_gid); +} + +static bool can_impersonate_uid(uid_t uid) +{ + return uid == current_fsuid() || can_manipulate_uids(); +} + +static bool can_read_other_uid_stats(uid_t uid) +{ + /* root pwnd */ + return unlikely(!current_fsuid()) || uid == current_fsuid() + || unlikely(!proc_stats_readall_gid) + || in_egroup_p(proc_stats_readall_gid); +} + +static inline void dc_add_byte_packets(struct data_counters *counters, int set, + enum ifs_tx_rx direction, + enum ifs_proto ifs_proto, + int bytes, + int packets) +{ + counters->bpc[set][direction][ifs_proto].bytes += bytes; + counters->bpc[set][direction][ifs_proto].packets += packets; +} + +static inline uint64_t dc_sum_bytes(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].bytes + + counters->bpc[set][direction][IFS_UDP].bytes + + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; +} + +static inline uint64_t dc_sum_packets(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].packets + + counters->bpc[set][direction][IFS_UDP].packets + + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; +} + +static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct tag_node *data = rb_entry(node, struct tag_node, node); + int result; + RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " + " node=%p data=%p\n", tag, node, data); + result = tag_compare(tag, data->tag); + RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " + " data.tag=0x%llx (uid=%u) res=%d\n", + tag, data->tag, get_uid_from_tag(data->tag), result); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct tag_node *this = rb_entry(*new, struct tag_node, + node); + int result = tag_compare(data->tag, this->tag); + RB_DEBUG("qtaguid: %s(): tag=0x%llx" + " (uid=%u)\n", __func__, + this->tag, + get_uid_from_tag(this->tag)); + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root) +{ + tag_node_tree_insert(&data->tn, root); +} + +static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag) +{ + struct tag_node *node = tag_node_tree_search(root, tag); + if (!node) + return NULL; + return rb_entry(&node->node, struct tag_stat, tn.node); +} + +static void tag_counter_set_tree_insert(struct tag_counter_set *data, + struct rb_root *root) +{ + tag_node_tree_insert(&data->tn, root); +} + +static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root, + tag_t tag) +{ + struct tag_node *node = tag_node_tree_search(root, tag); + if (!node) + return NULL; + return rb_entry(&node->node, struct tag_counter_set, tn.node); + +} + +static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root) +{ + tag_node_tree_insert(&data->tn, root); +} + +static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag) +{ + struct tag_node *node = tag_node_tree_search(root, tag); + if (!node) + return NULL; + return rb_entry(&node->node, struct tag_ref, tn.node); +} + +static struct sock_tag *sock_tag_tree_search(struct rb_root *root, + const struct sock *sk) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct sock_tag *data = rb_entry(node, struct sock_tag, + sock_node); + if (sk < data->sk) + node = node->rb_left; + else if (sk > data->sk) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct sock_tag *this = rb_entry(*new, struct sock_tag, + sock_node); + parent = *new; + if (data->sk < this->sk) + new = &((*new)->rb_left); + else if (data->sk > this->sk) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->sock_node, parent, new); + rb_insert_color(&data->sock_node, root); +} + +static void sock_tag_tree_erase(struct rb_root *st_to_free_tree) +{ + struct rb_node *node; + struct sock_tag *st_entry; + + node = rb_first(st_to_free_tree); + while (node) { + st_entry = rb_entry(node, struct sock_tag, sock_node); + node = rb_next(node); + CT_DEBUG("qtaguid: %s(): " + "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__, + st_entry->sk, + st_entry->tag, + get_uid_from_tag(st_entry->tag)); + rb_erase(&st_entry->sock_node, st_to_free_tree); + sockfd_put(st_entry->socket); + kfree(st_entry); + } +} + +static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root, + const pid_t pid) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct proc_qtu_data *data = rb_entry(node, + struct proc_qtu_data, + node); + if (pid < data->pid) + node = node->rb_left; + else if (pid > data->pid) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void proc_qtu_data_tree_insert(struct proc_qtu_data *data, + struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct proc_qtu_data *this = rb_entry(*new, + struct proc_qtu_data, + node); + parent = *new; + if (data->pid < this->pid) + new = &((*new)->rb_left); + else if (data->pid > this->pid) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static void uid_tag_data_tree_insert(struct uid_tag_data *data, + struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct uid_tag_data *this = rb_entry(*new, + struct uid_tag_data, + node); + parent = *new; + if (data->uid < this->uid) + new = &((*new)->rb_left); + else if (data->uid > this->uid) + new = &((*new)->rb_right); + else + BUG(); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root, + uid_t uid) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct uid_tag_data *data = rb_entry(node, + struct uid_tag_data, + node); + if (uid < data->uid) + node = node->rb_left; + else if (uid > data->uid) + node = node->rb_right; + else + return data; + } + return NULL; +} + +/* + * Allocates a new uid_tag_data struct if needed. + * Returns a pointer to the found or allocated uid_tag_data. + * Returns a PTR_ERR on failures, and lock is not held. + * If found is not NULL: + * sets *found to true if not allocated. + * sets *found to false if allocated. + */ +struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res) +{ + struct uid_tag_data *utd_entry; + + /* Look for top level uid_tag_data for the UID */ + utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid); + DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry); + + if (found_res) + *found_res = utd_entry; + if (utd_entry) + return utd_entry; + + utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC); + if (!utd_entry) { + pr_err("qtaguid: get_uid_data(%u): " + "tag data alloc failed\n", uid); + return ERR_PTR(-ENOMEM); + } + + utd_entry->uid = uid; + utd_entry->tag_ref_tree = RB_ROOT; + uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree); + DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry); + return utd_entry; +} + +/* Never returns NULL. Either PTR_ERR or a valid ptr. */ +static struct tag_ref *new_tag_ref(tag_t new_tag, + struct uid_tag_data *utd_entry) +{ + struct tag_ref *tr_entry; + int res; + + if (utd_entry->num_active_tags + 1 > max_sock_tags) { + pr_info("qtaguid: new_tag_ref(0x%llx): " + "tag ref alloc quota exceeded. max=%d\n", + new_tag, max_sock_tags); + res = -EMFILE; + goto err_res; + + } + + tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC); + if (!tr_entry) { + pr_err("qtaguid: new_tag_ref(0x%llx): " + "tag ref alloc failed\n", + new_tag); + res = -ENOMEM; + goto err_res; + } + tr_entry->tn.tag = new_tag; + /* tr_entry->num_sock_tags handled by caller */ + utd_entry->num_active_tags++; + tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree); + DR_DEBUG("qtaguid: new_tag_ref(0x%llx): " + " inserted new tag ref %p\n", + new_tag, tr_entry); + return tr_entry; + +err_res: + return ERR_PTR(res); +} + +static struct tag_ref *lookup_tag_ref(tag_t full_tag, + struct uid_tag_data **utd_res) +{ + struct uid_tag_data *utd_entry; + struct tag_ref *tr_entry; + bool found_utd; + uid_t uid = get_uid_from_tag(full_tag); + + DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n", + full_tag, uid); + + utd_entry = get_uid_data(uid, &found_utd); + if (IS_ERR_OR_NULL(utd_entry)) { + if (utd_res) + *utd_res = utd_entry; + return NULL; + } + + tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag); + if (utd_res) + *utd_res = utd_entry; + DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n", + full_tag, utd_entry, tr_entry); + return tr_entry; +} + +/* Never returns NULL. Either PTR_ERR or a valid ptr. */ +static struct tag_ref *get_tag_ref(tag_t full_tag, + struct uid_tag_data **utd_res) +{ + struct uid_tag_data *utd_entry; + struct tag_ref *tr_entry; + + DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n", + full_tag); + spin_lock_bh(&uid_tag_data_tree_lock); + tr_entry = lookup_tag_ref(full_tag, &utd_entry); + BUG_ON(IS_ERR_OR_NULL(utd_entry)); + if (!tr_entry) + tr_entry = new_tag_ref(full_tag, utd_entry); + + spin_unlock_bh(&uid_tag_data_tree_lock); + if (utd_res) + *utd_res = utd_entry; + DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n", + full_tag, utd_entry, tr_entry); + return tr_entry; +} + +/* Checks and maybe frees the UID Tag Data entry */ +static void put_utd_entry(struct uid_tag_data *utd_entry) +{ + /* Are we done with the UID tag data entry? */ + if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) && + !utd_entry->num_pqd) { + DR_DEBUG("qtaguid: %s(): " + "erase utd_entry=%p uid=%u " + "by pid=%u tgid=%u uid=%u\n", __func__, + utd_entry, utd_entry->uid, + current->pid, current->tgid, current_fsuid()); + BUG_ON(utd_entry->num_active_tags); + rb_erase(&utd_entry->node, &uid_tag_data_tree); + kfree(utd_entry); + } else { + DR_DEBUG("qtaguid: %s(): " + "utd_entry=%p still has %d tags %d proc_qtu_data\n", + __func__, utd_entry, utd_entry->num_active_tags, + utd_entry->num_pqd); + BUG_ON(!(utd_entry->num_active_tags || + utd_entry->num_pqd)); + } +} + +/* + * If no sock_tags are using this tag_ref, + * decrements refcount of utd_entry, removes tr_entry + * from utd_entry->tag_ref_tree and frees. + */ +static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry, + struct uid_tag_data *utd_entry) +{ + DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__, + tr_entry, tr_entry->tn.tag, + get_uid_from_tag(tr_entry->tn.tag)); + if (!tr_entry->num_sock_tags) { + BUG_ON(!utd_entry->num_active_tags); + utd_entry->num_active_tags--; + rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree); + DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry); + kfree(tr_entry); + } +} + +static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry) +{ + struct rb_node *node; + struct tag_ref *tr_entry; + tag_t acct_tag; + + DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__, + full_tag, get_uid_from_tag(full_tag)); + acct_tag = get_atag_from_tag(full_tag); + node = rb_first(&utd_entry->tag_ref_tree); + while (node) { + tr_entry = rb_entry(node, struct tag_ref, tn.node); + node = rb_next(node); + if (!acct_tag || tr_entry->tn.tag == full_tag) + free_tag_ref_from_utd_entry(tr_entry, utd_entry); + } +} + +static int read_proc_u64(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + uint64_t value; + char *p = page; + uint64_t *iface_entry = data; + + if (!data) + return 0; + + value = *iface_entry; + p += sprintf(p, "%llu\n", value); + len = (p - page) - off; + *eof = (len <= count) ? 1 : 0; + *start = page + off; + return len; +} + +static int read_proc_bool(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + bool value; + char *p = page; + bool *bool_entry = data; + + if (!data) + return 0; + + value = *bool_entry; + p += sprintf(p, "%u\n", value); + len = (p - page) - off; + *eof = (len <= count) ? 1 : 0; + *start = page + off; + return len; +} + +static int get_active_counter_set(tag_t tag) +{ + int active_set = 0; + struct tag_counter_set *tcs; + + MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)" + " (uid=%u)\n", + tag, get_uid_from_tag(tag)); + /* For now we only handle UID tags for active sets */ + tag = get_utag_from_tag(tag); + spin_lock_bh(&tag_counter_set_list_lock); + tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); + if (tcs) + active_set = tcs->active_set; + spin_unlock_bh(&tag_counter_set_list_lock); + return active_set; +} + +/* + * Find the entry for tracking the specified interface. + * Caller must hold iface_stat_list_lock + */ +static struct iface_stat *get_iface_entry(const char *ifname) +{ + struct iface_stat *iface_entry; + + /* Find the entry for tracking the specified tag within the interface */ + if (ifname == NULL) { + pr_info("qtaguid: iface_stat: get() NULL device name\n"); + return NULL; + } + + /* Iterate over interfaces */ + list_for_each_entry(iface_entry, &iface_stat_list, list) { + if (!strcmp(ifname, iface_entry->ifname)) + goto done; + } + iface_entry = NULL; +done: + return iface_entry; +} + +static int iface_stat_all_proc_read(char *page, char **num_items_returned, + off_t items_to_skip, int char_count, + int *eof, void *data) +{ + char *outp = page; + int item_index = 0; + int len; + struct iface_stat *iface_entry; + struct rtnl_link_stats64 dev_stats, *stats; + struct rtnl_link_stats64 no_dev_stats = {0}; + + if (unlikely(module_passive)) { + *eof = 1; + return 0; + } + + CT_DEBUG("qtaguid:proc iface_stat_all " + "page=%p *num_items_returned=%p off=%ld " + "char_count=%d *eof=%d\n", page, *num_items_returned, + items_to_skip, char_count, *eof); + + if (*eof) + return 0; + + /* + * This lock will prevent iface_stat_update() from changing active, + * and in turn prevent an interface from unregistering itself. + */ + spin_lock_bh(&iface_stat_list_lock); + list_for_each_entry(iface_entry, &iface_stat_list, list) { + if (item_index++ < items_to_skip) + continue; + + if (iface_entry->active) { + stats = dev_get_stats(iface_entry->net_dev, + &dev_stats); + } else { + stats = &no_dev_stats; + } + len = snprintf(outp, char_count, + "%s %d " + "%llu %llu %llu %llu " + "%llu %llu %llu %llu\n", + iface_entry->ifname, + iface_entry->active, + iface_entry->totals[IFS_RX].bytes, + iface_entry->totals[IFS_RX].packets, + iface_entry->totals[IFS_TX].bytes, + iface_entry->totals[IFS_TX].packets, + stats->rx_bytes, stats->rx_packets, + stats->tx_bytes, stats->tx_packets); + if (len >= char_count) { + spin_unlock_bh(&iface_stat_list_lock); + *outp = '\0'; + return outp - page; + } + outp += len; + char_count -= len; + (*num_items_returned)++; + } + spin_unlock_bh(&iface_stat_list_lock); + + *eof = 1; + return outp - page; +} + +static void iface_create_proc_worker(struct work_struct *work) +{ + struct proc_dir_entry *proc_entry; + struct iface_stat_work *isw = container_of(work, struct iface_stat_work, + iface_work); + struct iface_stat *new_iface = isw->iface_entry; + + /* iface_entries are not deleted, so safe to manipulate. */ + proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir); + if (IS_ERR_OR_NULL(proc_entry)) { + pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n"); + kfree(isw); + return; + } + + new_iface->proc_ptr = proc_entry; + + create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_TX].bytes); + create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_RX].bytes); + create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_TX].packets); + create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, + read_proc_u64, &new_iface->totals[IFS_RX].packets); + create_proc_read_entry("active", proc_iface_perms, proc_entry, + read_proc_bool, &new_iface->active); + + IF_DEBUG("qtaguid: iface_stat: create_proc(): done " + "entry=%p dev=%s\n", new_iface, new_iface->ifname); + kfree(isw); +} + +/* + * Will set the entry's active state, and + * update the net_dev accordingly also. + */ +static void _iface_stat_set_active(struct iface_stat *entry, + struct net_device *net_dev, + bool activate) +{ + if (activate) { + entry->net_dev = net_dev; + entry->active = true; + IF_DEBUG("qtaguid: %s(%s): " + "enable tracking. rfcnt=%d\n", __func__, + entry->ifname, + __this_cpu_read(*net_dev->pcpu_refcnt)); + } else { + entry->active = false; + entry->net_dev = NULL; + IF_DEBUG("qtaguid: %s(%s): " + "disable tracking. rfcnt=%d\n", __func__, + entry->ifname, + __this_cpu_read(*net_dev->pcpu_refcnt)); + + } +} + +/* Caller must hold iface_stat_list_lock */ +static struct iface_stat *iface_alloc(struct net_device *net_dev) +{ + struct iface_stat *new_iface; + struct iface_stat_work *isw; + + new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC); + if (new_iface == NULL) { + pr_err("qtaguid: iface_stat: create(%s): " + "iface_stat alloc failed\n", net_dev->name); + return NULL; + } + new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC); + if (new_iface->ifname == NULL) { + pr_err("qtaguid: iface_stat: create(%s): " + "ifname alloc failed\n", net_dev->name); + kfree(new_iface); + return NULL; + } + spin_lock_init(&new_iface->tag_stat_list_lock); + new_iface->tag_stat_tree = RB_ROOT; + _iface_stat_set_active(new_iface, net_dev, true); + + /* + * ipv6 notifier chains are atomic :( + * No create_proc_read_entry() for you! + */ + isw = kmalloc(sizeof(*isw), GFP_ATOMIC); + if (!isw) { + pr_err("qtaguid: iface_stat: create(%s): " + "work alloc failed\n", new_iface->ifname); + _iface_stat_set_active(new_iface, net_dev, false); + kfree(new_iface->ifname); + kfree(new_iface); + return NULL; + } + isw->iface_entry = new_iface; + INIT_WORK(&isw->iface_work, iface_create_proc_worker); + schedule_work(&isw->iface_work); + list_add(&new_iface->list, &iface_stat_list); + return new_iface; +} + +static void iface_check_stats_reset_and_adjust(struct net_device *net_dev, + struct iface_stat *iface) +{ + struct rtnl_link_stats64 dev_stats, *stats; + bool stats_rewound; + + stats = dev_get_stats(net_dev, &dev_stats); + /* No empty packets */ + stats_rewound = + (stats->rx_bytes < iface->last_known[IFS_RX].bytes) + || (stats->tx_bytes < iface->last_known[IFS_TX].bytes); + + IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p " + "bytes rx/tx=%llu/%llu " + "active=%d last_known=%d " + "stats_rewound=%d\n", __func__, + net_dev ? net_dev->name : "?", + iface, net_dev, + stats->rx_bytes, stats->tx_bytes, + iface->active, iface->last_known_valid, stats_rewound); + + if (iface->active && iface->last_known_valid && stats_rewound) { + pr_warn_once("qtaguid: iface_stat: %s(%s): " + "iface reset its stats unexpectedly\n", __func__, + net_dev->name); + + iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes; + iface->totals[IFS_TX].packets += + iface->last_known[IFS_TX].packets; + iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes; + iface->totals[IFS_RX].packets += + iface->last_known[IFS_RX].packets; + iface->last_known_valid = false; + IF_DEBUG("qtaguid: %s(%s): iface=%p " + "used last known bytes rx/tx=%llu/%llu\n", __func__, + iface->ifname, iface, iface->last_known[IFS_RX].bytes, + iface->last_known[IFS_TX].bytes); + } +} + +/* + * Create a new entry for tracking the specified interface. + * Do nothing if the entry already exists. + * Called when an interface is configured with a valid IP address. + */ +static void iface_stat_create(struct net_device *net_dev, + struct in_ifaddr *ifa) +{ + struct in_device *in_dev = NULL; + const char *ifname; + struct iface_stat *entry; + __be32 ipaddr = 0; + struct iface_stat *new_iface; + + IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n", + net_dev ? net_dev->name : "?", + ifa, net_dev); + if (!net_dev) { + pr_err("qtaguid: iface_stat: create(): no net dev\n"); + return; + } + + ifname = net_dev->name; + if (!ifa) { + in_dev = in_dev_get(net_dev); + if (!in_dev) { + pr_err("qtaguid: iface_stat: create(%s): no inet dev\n", + ifname); + return; + } + IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n", + ifname, in_dev); + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { + IF_DEBUG("qtaguid: iface_stat: create(%s): " + "ifa=%p ifa_label=%s\n", + ifname, ifa, + ifa->ifa_label ? ifa->ifa_label : "(null)"); + if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label)) + break; + } + } + + if (!ifa) { + IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n", + ifname); + goto done_put; + } + ipaddr = ifa->ifa_local; + + spin_lock_bh(&iface_stat_list_lock); + entry = get_iface_entry(ifname); + if (entry != NULL) { + bool activate = !ipv4_is_loopback(ipaddr); + IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", + ifname, entry); + iface_check_stats_reset_and_adjust(net_dev, entry); + _iface_stat_set_active(entry, net_dev, activate); + IF_DEBUG("qtaguid: %s(%s): " + "tracking now %d on ip=%pI4\n", __func__, + entry->ifname, activate, &ipaddr); + goto done_unlock_put; + } else if (ipv4_is_loopback(ipaddr)) { + IF_DEBUG("qtaguid: iface_stat: create(%s): " + "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); + goto done_unlock_put; + } + + new_iface = iface_alloc(net_dev); + IF_DEBUG("qtaguid: iface_stat: create(%s): done " + "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr); +done_unlock_put: + spin_unlock_bh(&iface_stat_list_lock); +done_put: + if (in_dev) + in_dev_put(in_dev); +} + +static void iface_stat_create_ipv6(struct net_device *net_dev, + struct inet6_ifaddr *ifa) +{ + struct in_device *in_dev; + const char *ifname; + struct iface_stat *entry; + struct iface_stat *new_iface; + int addr_type; + + IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n", + ifa, net_dev, net_dev ? net_dev->name : ""); + if (!net_dev) { + pr_err("qtaguid: iface_stat: create6(): no net dev!\n"); + return; + } + ifname = net_dev->name; + + in_dev = in_dev_get(net_dev); + if (!in_dev) { + pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n", + ifname); + return; + } + + IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n", + ifname, in_dev); + + if (!ifa) { + IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n", + ifname); + goto done_put; + } + addr_type = ipv6_addr_type(&ifa->addr); + + spin_lock_bh(&iface_stat_list_lock); + entry = get_iface_entry(ifname); + if (entry != NULL) { + bool activate = !(addr_type & IPV6_ADDR_LOOPBACK); + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, + ifname, entry); + iface_check_stats_reset_and_adjust(net_dev, entry); + _iface_stat_set_active(entry, net_dev, activate); + IF_DEBUG("qtaguid: %s(%s): " + "tracking now %d on ip=%pI6c\n", __func__, + entry->ifname, activate, &ifa->addr); + goto done_unlock_put; + } else if (addr_type & IPV6_ADDR_LOOPBACK) { + IF_DEBUG("qtaguid: %s(%s): " + "ignore loopback dev. ip=%pI6c\n", __func__, + ifname, &ifa->addr); + goto done_unlock_put; + } + + new_iface = iface_alloc(net_dev); + IF_DEBUG("qtaguid: iface_stat: create6(%s): done " + "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr); + +done_unlock_put: + spin_unlock_bh(&iface_stat_list_lock); +done_put: + in_dev_put(in_dev); +} + +static struct sock_tag *get_sock_stat_nl(const struct sock *sk) +{ + MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk); + return sock_tag_tree_search(&sock_tag_tree, sk); +} + +static struct sock_tag *get_sock_stat(const struct sock *sk) +{ + struct sock_tag *sock_tag_entry; + MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk); + if (!sk) + return NULL; + spin_lock_bh(&sock_tag_list_lock); + sock_tag_entry = get_sock_stat_nl(sk); + spin_unlock_bh(&sock_tag_list_lock); + return sock_tag_entry; +} + +static void +data_counters_update(struct data_counters *dc, int set, + enum ifs_tx_rx direction, int proto, int bytes) +{ + switch (proto) { + case IPPROTO_TCP: + dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1); + break; + case IPPROTO_UDP: + dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1); + break; + case IPPROTO_IP: + default: + dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes, + 1); + break; + } +} + +/* + * Update stats for the specified interface. Do nothing if the entry + * does not exist (when a device was never configured with an IP address). + * Called when an device is being unregistered. + */ +static void iface_stat_update(struct net_device *net_dev, bool stash_only) +{ + struct rtnl_link_stats64 dev_stats, *stats; + struct iface_stat *entry; + + stats = dev_get_stats(net_dev, &dev_stats); + spin_lock_bh(&iface_stat_list_lock); + entry = get_iface_entry(net_dev->name); + if (entry == NULL) { + IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n", + net_dev->name); + spin_unlock_bh(&iface_stat_list_lock); + return; + } + + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, + net_dev->name, entry); + if (!entry->active) { + IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__, + net_dev->name); + spin_unlock_bh(&iface_stat_list_lock); + return; + } + + if (stash_only) { + entry->last_known[IFS_TX].bytes = stats->tx_bytes; + entry->last_known[IFS_TX].packets = stats->tx_packets; + entry->last_known[IFS_RX].bytes = stats->rx_bytes; + entry->last_known[IFS_RX].packets = stats->rx_packets; + entry->last_known_valid = true; + IF_DEBUG("qtaguid: %s(%s): " + "dev stats stashed rx/tx=%llu/%llu\n", __func__, + net_dev->name, stats->rx_bytes, stats->tx_bytes); + spin_unlock_bh(&iface_stat_list_lock); + return; + } + entry->totals[IFS_TX].bytes += stats->tx_bytes; + entry->totals[IFS_TX].packets += stats->tx_packets; + entry->totals[IFS_RX].bytes += stats->rx_bytes; + entry->totals[IFS_RX].packets += stats->rx_packets; + /* We don't need the last_known[] anymore */ + entry->last_known_valid = false; + _iface_stat_set_active(entry, net_dev, false); + IF_DEBUG("qtaguid: %s(%s): " + "disable tracking. rx/tx=%llu/%llu\n", __func__, + net_dev->name, stats->rx_bytes, stats->tx_bytes); + spin_unlock_bh(&iface_stat_list_lock); +} + +static void tag_stat_update(struct tag_stat *tag_entry, + enum ifs_tx_rx direction, int proto, int bytes) +{ + int active_set; + active_set = get_active_counter_set(tag_entry->tn.tag); + MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d " + "dir=%d proto=%d bytes=%d)\n", + tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag), + active_set, direction, proto, bytes); + data_counters_update(&tag_entry->counters, active_set, direction, + proto, bytes); + if (tag_entry->parent_counters) + data_counters_update(tag_entry->parent_counters, active_set, + direction, proto, bytes); +} + +/* + * Create a new entry for tracking the specified {acct_tag,uid_tag} within + * the interface. + * iface_entry->tag_stat_list_lock should be held. + */ +static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry, + tag_t tag) +{ + struct tag_stat *new_tag_stat_entry = NULL; + IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx" + " (uid=%u)\n", __func__, + iface_entry, tag, get_uid_from_tag(tag)); + new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC); + if (!new_tag_stat_entry) { + pr_err("qtaguid: iface_stat: tag stat alloc failed\n"); + goto done; + } + new_tag_stat_entry->tn.tag = tag; + tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree); +done: + return new_tag_stat_entry; +} + +static void if_tag_stat_update(const char *ifname, uid_t uid, + const struct sock *sk, enum ifs_tx_rx direction, + int proto, int bytes) +{ + struct tag_stat *tag_stat_entry; + tag_t tag, acct_tag; + tag_t uid_tag; + struct data_counters *uid_tag_counters; + struct sock_tag *sock_tag_entry; + struct iface_stat *iface_entry; + struct tag_stat *new_tag_stat; + MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s " + "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", + ifname, uid, sk, direction, proto, bytes); + + + iface_entry = get_iface_entry(ifname); + if (!iface_entry) { + pr_err("qtaguid: iface_stat: stat_update() %s not found\n", + ifname); + return; + } + /* It is ok to process data when an iface_entry is inactive */ + + MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", + ifname, iface_entry); + + /* + * Look for a tagged sock. + * It will have an acct_uid. + */ + sock_tag_entry = get_sock_stat(sk); + if (sock_tag_entry) { + tag = sock_tag_entry->tag; + acct_tag = get_atag_from_tag(tag); + uid_tag = get_utag_from_tag(tag); + } else { + acct_tag = make_atag_from_value(0); + tag = combine_atag_with_uid(acct_tag, uid); + uid_tag = make_tag_from_uid(uid); + } + MT_DEBUG("qtaguid: iface_stat: stat_update(): " + " looking for tag=0x%llx (uid=%u) in ife=%p\n", + tag, get_uid_from_tag(tag), iface_entry); + /* Loop over tag list under this interface for {acct_tag,uid_tag} */ + spin_lock_bh(&iface_entry->tag_stat_list_lock); + + tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, + tag); + if (tag_stat_entry) { + /* + * Updating the {acct_tag, uid_tag} entry handles both stats: + * {0, uid_tag} will also get updated. + */ + tag_stat_update(tag_stat_entry, direction, proto, bytes); + spin_unlock_bh(&iface_entry->tag_stat_list_lock); + return; + } + + /* Loop over tag list under this interface for {0,uid_tag} */ + tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, + uid_tag); + if (!tag_stat_entry) { + /* Here: the base uid_tag did not exist */ + /* + * No parent counters. So + * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. + */ + new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); + uid_tag_counters = &new_tag_stat->counters; + } else { + uid_tag_counters = &tag_stat_entry->counters; + } + + if (acct_tag) { + new_tag_stat = create_if_tag_stat(iface_entry, tag); + new_tag_stat->parent_counters = uid_tag_counters; + } + tag_stat_update(new_tag_stat, direction, proto, bytes); + spin_unlock_bh(&iface_entry->tag_stat_list_lock); +} + +static int iface_netdev_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) { + struct net_device *dev = ptr; + + if (unlikely(module_passive)) + return NOTIFY_DONE; + + IF_DEBUG("qtaguid: iface_stat: netdev_event(): " + "ev=0x%lx/%s netdev=%p->name=%s\n", + event, netdev_evt_str(event), dev, dev ? dev->name : ""); + + switch (event) { + case NETDEV_UP: + iface_stat_create(dev, NULL); + atomic64_inc(&qtu_events.iface_events); + break; + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + iface_stat_update(dev, event == NETDEV_DOWN); + atomic64_inc(&qtu_events.iface_events); + break; + } + return NOTIFY_DONE; +} + +static int iface_inet6addr_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = ptr; + struct net_device *dev; + + if (unlikely(module_passive)) + return NOTIFY_DONE; + + IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): " + "ev=0x%lx/%s ifa=%p\n", + event, netdev_evt_str(event), ifa); + + switch (event) { + case NETDEV_UP: + BUG_ON(!ifa || !ifa->idev); + dev = (struct net_device *)ifa->idev->dev; + iface_stat_create_ipv6(dev, ifa); + atomic64_inc(&qtu_events.iface_events); + break; + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + BUG_ON(!ifa || !ifa->idev); + dev = (struct net_device *)ifa->idev->dev; + iface_stat_update(dev, event == NETDEV_DOWN); + atomic64_inc(&qtu_events.iface_events); + break; + } + return NOTIFY_DONE; +} + +static int iface_inetaddr_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct net_device *dev; + + if (unlikely(module_passive)) + return NOTIFY_DONE; + + IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): " + "ev=0x%lx/%s ifa=%p\n", + event, netdev_evt_str(event), ifa); + + switch (event) { + case NETDEV_UP: + BUG_ON(!ifa || !ifa->ifa_dev); + dev = ifa->ifa_dev->dev; + iface_stat_create(dev, ifa); + atomic64_inc(&qtu_events.iface_events); + break; + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + BUG_ON(!ifa || !ifa->ifa_dev); + dev = ifa->ifa_dev->dev; + iface_stat_update(dev, event == NETDEV_DOWN); + atomic64_inc(&qtu_events.iface_events); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block iface_netdev_notifier_blk = { + .notifier_call = iface_netdev_event_handler, +}; + +static struct notifier_block iface_inetaddr_notifier_blk = { + .notifier_call = iface_inetaddr_event_handler, +}; + +static struct notifier_block iface_inet6addr_notifier_blk = { + .notifier_call = iface_inet6addr_event_handler, +}; + +static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) +{ + int err; + + iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir); + if (!iface_stat_procdir) { + pr_err("qtaguid: iface_stat: init failed to create proc entry\n"); + err = -1; + goto err; + } + + iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename, + proc_iface_perms, + parent_procdir); + if (!iface_stat_all_procfile) { + pr_err("qtaguid: iface_stat: init " + " failed to create stat_all proc entry\n"); + err = -1; + goto err_zap_entry; + } + iface_stat_all_procfile->read_proc = iface_stat_all_proc_read; + + + err = register_netdevice_notifier(&iface_netdev_notifier_blk); + if (err) { + pr_err("qtaguid: iface_stat: init " + "failed to register dev event handler\n"); + goto err_zap_all_stats_entry; + } + err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk); + if (err) { + pr_err("qtaguid: iface_stat: init " + "failed to register ipv4 dev event handler\n"); + goto err_unreg_nd; + } + + err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk); + if (err) { + pr_err("qtaguid: iface_stat: init " + "failed to register ipv6 dev event handler\n"); + goto err_unreg_ip4_addr; + } + return 0; + +err_unreg_ip4_addr: + unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk); +err_unreg_nd: + unregister_netdevice_notifier(&iface_netdev_notifier_blk); +err_zap_all_stats_entry: + remove_proc_entry(iface_stat_all_procfilename, parent_procdir); +err_zap_entry: + remove_proc_entry(iface_stat_procdirname, parent_procdir); +err: + return err; +} + +static struct sock *qtaguid_find_sk(const struct sk_buff *skb, + struct xt_action_param *par) +{ + struct sock *sk; + unsigned int hook_mask = (1 << par->hooknum); + + MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, + par->hooknum, par->family); + + /* + * Let's not abuse the the xt_socket_get*_sk(), or else it will + * return garbage SKs. + */ + if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS)) + return NULL; + + switch (par->family) { + case NFPROTO_IPV6: + sk = xt_socket_get6_sk(skb, par); + break; + case NFPROTO_IPV4: + sk = xt_socket_get4_sk(skb, par); + break; + default: + return NULL; + } + + /* + * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs. + * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959 + * Not fixed in 3.0-r3 :( + */ + if (sk) { + MT_DEBUG("qtaguid: %p->sk_proto=%u " + "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); + if (sk->sk_state == TCP_TIME_WAIT) { + xt_socket_put_sk(sk); + sk = NULL; + } + } + return sk; +} + +static void account_for_uid(const struct sk_buff *skb, + const struct sock *alternate_sk, uid_t uid, + struct xt_action_param *par) +{ + const struct net_device *el_dev; + + if (!skb->dev) { + MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); + el_dev = par->in ? : par->out; + } else { + const struct net_device *other_dev; + el_dev = skb->dev; + other_dev = par->in ? : par->out; + if (el_dev != other_dev) { + MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " + "par->(in/out)=%p %s\n", + par->hooknum, el_dev, el_dev->name, other_dev, + other_dev->name); + } + } + + if (unlikely(!el_dev)) { + pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum); + } else if (unlikely(!el_dev->name)) { + pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); + } else { + MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n", + par->hooknum, + el_dev->name, + el_dev->type); + + if_tag_stat_update(el_dev->name, uid, + skb->sk ? skb->sk : alternate_sk, + par->in ? IFS_RX : IFS_TX, + ip_hdr(skb)->protocol, skb->len); + } +} + +static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_qtaguid_match_info *info = par->matchinfo; + const struct file *filp; + bool got_sock = false; + struct sock *sk; + uid_t sock_uid; + bool res; + + if (unlikely(module_passive)) + return (info->match ^ info->invert) == 0; + + MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n", + par->hooknum, skb, par->in, par->out, par->family); + + atomic64_inc(&qtu_events.match_calls); + if (skb == NULL) { + res = (info->match ^ info->invert) == 0; + goto ret_res; + } + + sk = skb->sk; + + if (sk == NULL) { + /* + * A missing sk->sk_socket happens when packets are in-flight + * and the matching socket is already closed and gone. + */ + sk = qtaguid_find_sk(skb, par); + /* + * If we got the socket from the find_sk(), we will need to put + * it back, as nf_tproxy_get_sock_v4() got it. + */ + got_sock = sk; + if (sk) + atomic64_inc(&qtu_events.match_found_sk_in_ct); + else + atomic64_inc(&qtu_events.match_found_no_sk_in_ct); + } else { + atomic64_inc(&qtu_events.match_found_sk); + } + MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n", + par->hooknum, sk, got_sock, ip_hdr(skb)->protocol); + if (sk != NULL) { + MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", + par->hooknum, sk, sk->sk_socket, + sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); + filp = sk->sk_socket ? sk->sk_socket->file : NULL; + MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", + par->hooknum, filp ? filp->f_cred->fsuid : -1); + } + + if (sk == NULL || sk->sk_socket == NULL) { + /* + * Here, the qtaguid_find_sk() using connection tracking + * couldn't find the owner, so for now we just count them + * against the system. + */ + /* + * TODO: unhack how to force just accounting. + * For now we only do iface stats when the uid-owner is not + * requested. + */ + if (!(info->match & XT_QTAGUID_UID)) + account_for_uid(skb, sk, 0, par); + MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", + par->hooknum, + sk ? sk->sk_socket : NULL); + res = (info->match ^ info->invert) == 0; + atomic64_inc(&qtu_events.match_no_sk); + goto put_sock_ret_res; + } else if (info->match & info->invert & XT_QTAGUID_SOCKET) { + res = false; + goto put_sock_ret_res; + } + filp = sk->sk_socket->file; + if (filp == NULL) { + MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); + account_for_uid(skb, sk, 0, par); + res = ((info->match ^ info->invert) & + (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; + atomic64_inc(&qtu_events.match_no_sk_file); + goto put_sock_ret_res; + } + sock_uid = filp->f_cred->fsuid; + /* + * TODO: unhack how to force just accounting. + * For now we only do iface stats when the uid-owner is not requested + */ + if (!(info->match & XT_QTAGUID_UID)) + account_for_uid(skb, sk, sock_uid, par); + + /* + * The following two tests fail the match when: + * id not in range AND no inverted condition requested + * or id in range AND inverted condition requested + * Thus (!a && b) || (a && !b) == a ^ b + */ + if (info->match & XT_QTAGUID_UID) + if ((filp->f_cred->fsuid >= info->uid_min && + filp->f_cred->fsuid <= info->uid_max) ^ + !(info->invert & XT_QTAGUID_UID)) { + MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", + par->hooknum); + res = false; + goto put_sock_ret_res; + } + if (info->match & XT_QTAGUID_GID) + if ((filp->f_cred->fsgid >= info->gid_min && + filp->f_cred->fsgid <= info->gid_max) ^ + !(info->invert & XT_QTAGUID_GID)) { + MT_DEBUG("qtaguid[%d]: leaving gid not matching\n", + par->hooknum); + res = false; + goto put_sock_ret_res; + } + + MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum); + res = true; + +put_sock_ret_res: + if (got_sock) + xt_socket_put_sk(sk); +ret_res: + MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res); + return res; +} + +#ifdef DDEBUG +/* This function is not in xt_qtaguid_print.c because of locks visibility */ +static void prdebug_full_state(int indent_level, const char *fmt, ...) +{ + va_list args; + char *fmt_buff; + char *buff; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + fmt_buff = kasprintf(GFP_ATOMIC, + "qtaguid: %s(): %s {\n", __func__, fmt); + BUG_ON(!fmt_buff); + va_start(args, fmt); + buff = kvasprintf(GFP_ATOMIC, + fmt_buff, args); + BUG_ON(!buff); + pr_debug("%s", buff); + kfree(fmt_buff); + kfree(buff); + va_end(args); + + spin_lock_bh(&sock_tag_list_lock); + prdebug_sock_tag_tree(indent_level, &sock_tag_tree); + spin_unlock_bh(&sock_tag_list_lock); + + spin_lock_bh(&sock_tag_list_lock); + spin_lock_bh(&uid_tag_data_tree_lock); + prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree); + prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree); + spin_unlock_bh(&uid_tag_data_tree_lock); + spin_unlock_bh(&sock_tag_list_lock); + + spin_lock_bh(&iface_stat_list_lock); + prdebug_iface_stat_list(indent_level, &iface_stat_list); + spin_unlock_bh(&iface_stat_list_lock); + + pr_debug("qtaguid: %s(): }\n", __func__); +} +#else +static void prdebug_full_state(int indent_level, const char *fmt, ...) {} +#endif + +/* + * Procfs reader to get all active socket tags using style "1)" as described in + * fs/proc/generic.c + */ +static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, + off_t items_to_skip, int char_count, int *eof, + void *data) +{ + char *outp = page; + int len; + uid_t uid; + struct rb_node *node; + struct sock_tag *sock_tag_entry; + int item_index = 0; + int indent_level = 0; + long f_count; + + if (unlikely(module_passive)) { + *eof = 1; + return 0; + } + + if (*eof) + return 0; + + CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n", + page, items_to_skip, char_count, *eof); + + spin_lock_bh(&sock_tag_list_lock); + for (node = rb_first(&sock_tag_tree); + node; + node = rb_next(node)) { + if (item_index++ < items_to_skip) + continue; + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); + uid = get_uid_from_tag(sock_tag_entry->tag); + CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) " + "pid=%u\n", + sock_tag_entry->sk, + sock_tag_entry->tag, + uid, + sock_tag_entry->pid + ); + f_count = atomic_long_read( + &sock_tag_entry->socket->file->f_count); + len = snprintf(outp, char_count, + "sock=%p tag=0x%llx (uid=%u) pid=%u " + "f_count=%lu\n", + sock_tag_entry->sk, + sock_tag_entry->tag, uid, + sock_tag_entry->pid, f_count); + if (len >= char_count) { + spin_unlock_bh(&sock_tag_list_lock); + *outp = '\0'; + return outp - page; + } + outp += len; + char_count -= len; + (*num_items_returned)++; + } + spin_unlock_bh(&sock_tag_list_lock); + + if (item_index++ >= items_to_skip) { + len = snprintf(outp, char_count, + "events: sockets_tagged=%llu " + "sockets_untagged=%llu " + "counter_set_changes=%llu " + "delete_cmds=%llu " + "iface_events=%llu " + "match_calls=%llu " + "match_found_sk=%llu " + "match_found_sk_in_ct=%llu " + "match_found_no_sk_in_ct=%llu " + "match_no_sk=%llu " + "match_no_sk_file=%llu\n", + atomic64_read(&qtu_events.sockets_tagged), + atomic64_read(&qtu_events.sockets_untagged), + atomic64_read(&qtu_events.counter_set_changes), + atomic64_read(&qtu_events.delete_cmds), + atomic64_read(&qtu_events.iface_events), + atomic64_read(&qtu_events.match_calls), + atomic64_read(&qtu_events.match_found_sk), + atomic64_read(&qtu_events.match_found_sk_in_ct), + atomic64_read( + &qtu_events.match_found_no_sk_in_ct), + atomic64_read(&qtu_events.match_no_sk), + atomic64_read(&qtu_events.match_no_sk_file)); + if (len >= char_count) { + *outp = '\0'; + return outp - page; + } + outp += len; + char_count -= len; + (*num_items_returned)++; + } + + /* Count the following as part of the last item_index */ + if (item_index > items_to_skip) { + prdebug_full_state(indent_level, "proc ctrl"); + } + + *eof = 1; + return outp - page; +} + +/* + * Delete socket tags, and stat tags associated with a given + * accouting tag and uid. + */ +static int ctrl_cmd_delete(const char *input) +{ + char cmd; + uid_t uid; + uid_t entry_uid; + tag_t acct_tag; + tag_t tag; + int res, argc; + struct iface_stat *iface_entry; + struct rb_node *node; + struct sock_tag *st_entry; + struct rb_root st_to_free_tree = RB_ROOT; + struct tag_stat *ts_entry; + struct tag_counter_set *tcs_entry; + struct tag_ref *tr_entry; + struct uid_tag_data *utd_entry; + + argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid); + CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c " + "user_tag=0x%llx uid=%u\n", input, argc, cmd, + acct_tag, uid); + if (argc < 2) { + res = -EINVAL; + goto err; + } + if (!valid_atag(acct_tag)) { + pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input); + res = -EINVAL; + goto err; + } + if (argc < 3) { + uid = current_fsuid(); + } else if (!can_impersonate_uid(uid)) { + pr_info("qtaguid: ctrl_delete(%s): " + "insufficient priv from pid=%u tgid=%u uid=%u\n", + input, current->pid, current->tgid, current_fsuid()); + res = -EPERM; + goto err; + } + + tag = combine_atag_with_uid(acct_tag, uid); + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "looking for tag=0x%llx (uid=%u)\n", + input, tag, uid); + + /* Delete socket tags */ + spin_lock_bh(&sock_tag_list_lock); + node = rb_first(&sock_tag_tree); + while (node) { + st_entry = rb_entry(node, struct sock_tag, sock_node); + entry_uid = get_uid_from_tag(st_entry->tag); + node = rb_next(node); + if (entry_uid != uid) + continue; + + CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n", + input, st_entry->tag, entry_uid); + + if (!acct_tag || st_entry->tag == tag) { + rb_erase(&st_entry->sock_node, &sock_tag_tree); + /* Can't sockfd_put() within spinlock, do it later. */ + sock_tag_tree_insert(st_entry, &st_to_free_tree); + tr_entry = lookup_tag_ref(st_entry->tag, NULL); + BUG_ON(tr_entry->num_sock_tags <= 0); + tr_entry->num_sock_tags--; + /* + * TODO: remove if, and start failing. + * This is a hack to work around the fact that in some + * places we have "if (IS_ERR_OR_NULL(pqd_entry))" + * and are trying to work around apps + * that didn't open the /dev/xt_qtaguid. + */ + if (st_entry->list.next && st_entry->list.prev) + list_del(&st_entry->list); + } + } + spin_unlock_bh(&sock_tag_list_lock); + + sock_tag_tree_erase(&st_to_free_tree); + + /* Delete tag counter-sets */ + spin_lock_bh(&tag_counter_set_list_lock); + /* Counter sets are only on the uid tag, not full tag */ + tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag); + if (tcs_entry) { + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "erase tcs: tag=0x%llx (uid=%u) set=%d\n", + input, + tcs_entry->tn.tag, + get_uid_from_tag(tcs_entry->tn.tag), + tcs_entry->active_set); + rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree); + kfree(tcs_entry); + } + spin_unlock_bh(&tag_counter_set_list_lock); + + /* + * If acct_tag is 0, then all entries belonging to uid are + * erased. + */ + spin_lock_bh(&iface_stat_list_lock); + list_for_each_entry(iface_entry, &iface_stat_list, list) { + spin_lock_bh(&iface_entry->tag_stat_list_lock); + node = rb_first(&iface_entry->tag_stat_tree); + while (node) { + ts_entry = rb_entry(node, struct tag_stat, tn.node); + entry_uid = get_uid_from_tag(ts_entry->tn.tag); + node = rb_next(node); + + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "ts tag=0x%llx (uid=%u)\n", + input, ts_entry->tn.tag, entry_uid); + + if (entry_uid != uid) + continue; + if (!acct_tag || ts_entry->tn.tag == tag) { + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "erase ts: %s 0x%llx %u\n", + input, iface_entry->ifname, + get_atag_from_tag(ts_entry->tn.tag), + entry_uid); + rb_erase(&ts_entry->tn.node, + &iface_entry->tag_stat_tree); + kfree(ts_entry); + } + } + spin_unlock_bh(&iface_entry->tag_stat_list_lock); + } + spin_unlock_bh(&iface_stat_list_lock); + + /* Cleanup the uid_tag_data */ + spin_lock_bh(&uid_tag_data_tree_lock); + node = rb_first(&uid_tag_data_tree); + while (node) { + utd_entry = rb_entry(node, struct uid_tag_data, node); + entry_uid = utd_entry->uid; + node = rb_next(node); + + CT_DEBUG("qtaguid: ctrl_delete(%s): " + "utd uid=%u\n", + input, entry_uid); + + if (entry_uid != uid) + continue; + /* + * Go over the tag_refs, and those that don't have + * sock_tags using them are freed. + */ + put_tag_ref_tree(tag, utd_entry); + put_utd_entry(utd_entry); + } + spin_unlock_bh(&uid_tag_data_tree_lock); + + atomic64_inc(&qtu_events.delete_cmds); + res = 0; + +err: + return res; +} + +static int ctrl_cmd_counter_set(const char *input) +{ + char cmd; + uid_t uid = 0; + tag_t tag; + int res, argc; + struct tag_counter_set *tcs; + int counter_set; + + argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid); + CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c " + "set=%d uid=%u\n", input, argc, cmd, + counter_set, uid); + if (argc != 3) { + res = -EINVAL; + goto err; + } + if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) { + pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n", + input); + res = -EINVAL; + goto err; + } + if (!can_manipulate_uids()) { + pr_info("qtaguid: ctrl_counterset(%s): " + "insufficient priv from pid=%u tgid=%u uid=%u\n", + input, current->pid, current->tgid, current_fsuid()); + res = -EPERM; + goto err; + } + + tag = make_tag_from_uid(uid); + spin_lock_bh(&tag_counter_set_list_lock); + tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); + if (!tcs) { + tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC); + if (!tcs) { + spin_unlock_bh(&tag_counter_set_list_lock); + pr_err("qtaguid: ctrl_counterset(%s): " + "failed to alloc counter set\n", + input); + res = -ENOMEM; + goto err; + } + tcs->tn.tag = tag; + tag_counter_set_tree_insert(tcs, &tag_counter_set_tree); + CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx " + "(uid=%u) set=%d\n", + input, tag, get_uid_from_tag(tag), counter_set); + } + tcs->active_set = counter_set; + spin_unlock_bh(&tag_counter_set_list_lock); + atomic64_inc(&qtu_events.counter_set_changes); + res = 0; + +err: + return res; +} + +static int ctrl_cmd_tag(const char *input) +{ + char cmd; + int sock_fd = 0; + uid_t uid = 0; + tag_t acct_tag = make_atag_from_value(0); + tag_t full_tag; + struct socket *el_socket; + int res, argc; + struct sock_tag *sock_tag_entry; + struct tag_ref *tag_ref_entry; + struct uid_tag_data *uid_tag_data_entry; + struct proc_qtu_data *pqd_entry; + + /* Unassigned args will get defaulted later. */ + argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid); + CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d " + "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd, + acct_tag, uid); + if (argc < 2) { + res = -EINVAL; + goto err; + } + el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ + if (!el_socket) { + pr_info("qtaguid: ctrl_tag(%s): failed to lookup" + " sock_fd=%d err=%d\n", input, sock_fd, res); + goto err; + } + CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", + input, atomic_long_read(&el_socket->file->f_count), + el_socket->sk); + if (argc < 3) { + acct_tag = make_atag_from_value(0); + } else if (!valid_atag(acct_tag)) { + pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input); + res = -EINVAL; + goto err_put; + } + CT_DEBUG("qtaguid: ctrl_tag(%s): " + "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " + "in_group=%d in_egroup=%d\n", + input, current->pid, current->tgid, current_uid(), + current_euid(), current_fsuid(), + in_group_p(proc_ctrl_write_gid), + in_egroup_p(proc_ctrl_write_gid)); + if (argc < 4) { + uid = current_fsuid(); + } else if (!can_impersonate_uid(uid)) { + pr_info("qtaguid: ctrl_tag(%s): " + "insufficient priv from pid=%u tgid=%u uid=%u\n", + input, current->pid, current->tgid, current_fsuid()); + res = -EPERM; + goto err_put; + } + full_tag = combine_atag_with_uid(acct_tag, uid); + + spin_lock_bh(&sock_tag_list_lock); + sock_tag_entry = get_sock_stat_nl(el_socket->sk); + tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry); + if (IS_ERR(tag_ref_entry)) { + res = PTR_ERR(tag_ref_entry); + spin_unlock_bh(&sock_tag_list_lock); + goto err_put; + } + tag_ref_entry->num_sock_tags++; + if (sock_tag_entry) { + struct tag_ref *prev_tag_ref_entry; + + CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p " + "st@%p ...->f_count=%ld\n", + input, el_socket->sk, sock_tag_entry, + atomic_long_read(&el_socket->file->f_count)); + /* + * This is a re-tagging, so release the sock_fd that was + * locked at the time of the 1st tagging. + * There is still the ref from this call's sockfd_lookup() so + * it can be done within the spinlock. + */ + sockfd_put(sock_tag_entry->socket); + prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, + &uid_tag_data_entry); + BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry)); + BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0); + prev_tag_ref_entry->num_sock_tags--; + sock_tag_entry->tag = full_tag; + } else { + CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n", + input, el_socket->sk); + sock_tag_entry = kzalloc(sizeof(*sock_tag_entry), + GFP_ATOMIC); + if (!sock_tag_entry) { + pr_err("qtaguid: ctrl_tag(%s): " + "socket tag alloc failed\n", + input); + spin_unlock_bh(&sock_tag_list_lock); + res = -ENOMEM; + goto err_tag_unref_put; + } + sock_tag_entry->sk = el_socket->sk; + sock_tag_entry->socket = el_socket; + sock_tag_entry->pid = current->tgid; + sock_tag_entry->tag = combine_atag_with_uid(acct_tag, + uid); + spin_lock_bh(&uid_tag_data_tree_lock); + pqd_entry = proc_qtu_data_tree_search( + &proc_qtu_data_tree, current->tgid); + /* + * TODO: remove if, and start failing. + * At first, we want to catch user-space code that is not + * opening the /dev/xt_qtaguid. + */ + if (IS_ERR_OR_NULL(pqd_entry)) + pr_warn_once( + "qtaguid: %s(): " + "User space forgot to open /dev/xt_qtaguid? " + "pid=%u tgid=%u uid=%u\n", __func__, + current->pid, current->tgid, + current_fsuid()); + else + list_add(&sock_tag_entry->list, + &pqd_entry->sock_tag_list); + spin_unlock_bh(&uid_tag_data_tree_lock); + + sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); + atomic64_inc(&qtu_events.sockets_tagged); + } + spin_unlock_bh(&sock_tag_list_lock); + /* We keep the ref to the socket (file) until it is untagged */ + CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n", + input, sock_tag_entry, + atomic_long_read(&el_socket->file->f_count)); + return 0; + +err_tag_unref_put: + BUG_ON(tag_ref_entry->num_sock_tags <= 0); + tag_ref_entry->num_sock_tags--; + free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry); +err_put: + CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n", + input, atomic_long_read(&el_socket->file->f_count) - 1); + /* Release the sock_fd that was grabbed by sockfd_lookup(). */ + sockfd_put(el_socket); + return res; + +err: + CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input); + return res; +} + +static int ctrl_cmd_untag(const char *input) +{ + char cmd; + int sock_fd = 0; + struct socket *el_socket; + int res, argc; + struct sock_tag *sock_tag_entry; + struct tag_ref *tag_ref_entry; + struct uid_tag_data *utd_entry; + struct proc_qtu_data *pqd_entry; + + argc = sscanf(input, "%c %d", &cmd, &sock_fd); + CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n", + input, argc, cmd, sock_fd); + if (argc < 2) { + res = -EINVAL; + goto err; + } + el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ + if (!el_socket) { + pr_info("qtaguid: ctrl_untag(%s): failed to lookup" + " sock_fd=%d err=%d\n", input, sock_fd, res); + goto err; + } + CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", + input, atomic_long_read(&el_socket->file->f_count), + el_socket->sk); + spin_lock_bh(&sock_tag_list_lock); + sock_tag_entry = get_sock_stat_nl(el_socket->sk); + if (!sock_tag_entry) { + spin_unlock_bh(&sock_tag_list_lock); + res = -EINVAL; + goto err_put; + } + /* + * The socket already belongs to the current process + * so it can do whatever it wants to it. + */ + rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree); + + tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry); + BUG_ON(!tag_ref_entry); + BUG_ON(tag_ref_entry->num_sock_tags <= 0); + spin_lock_bh(&uid_tag_data_tree_lock); + pqd_entry = proc_qtu_data_tree_search( + &proc_qtu_data_tree, current->tgid); + /* + * TODO: remove if, and start failing. + * At first, we want to catch user-space code that is not + * opening the /dev/xt_qtaguid. + */ + if (IS_ERR_OR_NULL(pqd_entry)) + pr_warn_once("qtaguid: %s(): " + "User space forgot to open /dev/xt_qtaguid? " + "pid=%u tgid=%u uid=%u\n", __func__, + current->pid, current->tgid, current_fsuid()); + else + list_del(&sock_tag_entry->list); + spin_unlock_bh(&uid_tag_data_tree_lock); + /* + * We don't free tag_ref from the utd_entry here, + * only during a cmd_delete(). + */ + tag_ref_entry->num_sock_tags--; + spin_unlock_bh(&sock_tag_list_lock); + /* + * Release the sock_fd that was grabbed at tag time, + * and once more for the sockfd_lookup() here. + */ + sockfd_put(sock_tag_entry->socket); + CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n", + input, sock_tag_entry, + atomic_long_read(&el_socket->file->f_count) - 1); + sockfd_put(el_socket); + + kfree(sock_tag_entry); + atomic64_inc(&qtu_events.sockets_untagged); + + return 0; + +err_put: + CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n", + input, atomic_long_read(&el_socket->file->f_count) - 1); + /* Release the sock_fd that was grabbed by sockfd_lookup(). */ + sockfd_put(el_socket); + return res; + +err: + CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input); + return res; +} + +static int qtaguid_ctrl_parse(const char *input, int count) +{ + char cmd; + int res; + + cmd = input[0]; + /* Collect params for commands */ + switch (cmd) { + case 'd': + res = ctrl_cmd_delete(input); + break; + + case 's': + res = ctrl_cmd_counter_set(input); + break; + + case 't': + res = ctrl_cmd_tag(input); + break; + + case 'u': + res = ctrl_cmd_untag(input); + break; + + default: + res = -EINVAL; + goto err; + } + if (!res) + res = count; +err: + CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res); + return res; +} + +#define MAX_QTAGUID_CTRL_INPUT_LEN 255 +static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN]; + + if (unlikely(module_passive)) + return count; + + if (count >= MAX_QTAGUID_CTRL_INPUT_LEN) + return -EINVAL; + + if (copy_from_user(input_buf, buffer, count)) + return -EFAULT; + + input_buf[count] = '\0'; + return qtaguid_ctrl_parse(input_buf, count); +} + +struct proc_print_info { + char *outp; + char **num_items_returned; + struct iface_stat *iface_entry; + struct tag_stat *ts_entry; + int item_index; + int items_to_skip; + int char_count; +}; + +static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) +{ + int len; + struct data_counters *cnts; + + if (!ppi->item_index) { + if (ppi->item_index++ < ppi->items_to_skip) + return 0; + len = snprintf(ppi->outp, ppi->char_count, + "idx iface acct_tag_hex uid_tag_int cnt_set " + "rx_bytes rx_packets " + "tx_bytes tx_packets " + "rx_tcp_bytes rx_tcp_packets " + "rx_udp_bytes rx_udp_packets " + "rx_other_bytes rx_other_packets " + "tx_tcp_bytes tx_tcp_packets " + "tx_udp_bytes tx_udp_packets " + "tx_other_bytes tx_other_packets\n"); + } else { + tag_t tag = ppi->ts_entry->tn.tag; + uid_t stat_uid = get_uid_from_tag(tag); + + if (!can_read_other_uid_stats(stat_uid)) { + CT_DEBUG("qtaguid: stats line: " + "%s 0x%llx %u: insufficient priv " + "from pid=%u tgid=%u uid=%u\n", + ppi->iface_entry->ifname, + get_atag_from_tag(tag), stat_uid, + current->pid, current->tgid, current_fsuid()); + return 0; + } + if (ppi->item_index++ < ppi->items_to_skip) + return 0; + cnts = &ppi->ts_entry->counters; + len = snprintf( + ppi->outp, ppi->char_count, + "%d %s 0x%llx %u %u " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu\n", + ppi->item_index, + ppi->iface_entry->ifname, + get_atag_from_tag(tag), + stat_uid, + cnt_set, + dc_sum_bytes(cnts, cnt_set, IFS_RX), + dc_sum_packets(cnts, cnt_set, IFS_RX), + dc_sum_bytes(cnts, cnt_set, IFS_TX), + dc_sum_packets(cnts, cnt_set, IFS_TX), + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); + } + return len; +} + +static bool pp_sets(struct proc_print_info *ppi) +{ + int len; + int counter_set; + for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS; + counter_set++) { + len = pp_stats_line(ppi, counter_set); + if (len >= ppi->char_count) { + *ppi->outp = '\0'; + return false; + } + if (len) { + ppi->outp += len; + ppi->char_count -= len; + (*ppi->num_items_returned)++; + } + } + return true; +} + +/* + * Procfs reader to get all tag stats using style "1)" as described in + * fs/proc/generic.c + * Groups all protocols tx/rx bytes. + */ +static int qtaguid_stats_proc_read(char *page, char **num_items_returned, + off_t items_to_skip, int char_count, int *eof, + void *data) +{ + struct proc_print_info ppi; + int len; + + ppi.outp = page; + ppi.item_index = 0; + ppi.char_count = char_count; + ppi.num_items_returned = num_items_returned; + ppi.items_to_skip = items_to_skip; + + if (unlikely(module_passive)) { + len = pp_stats_line(&ppi, 0); + /* The header should always be shorter than the buffer. */ + BUG_ON(len >= ppi.char_count); + (*num_items_returned)++; + *eof = 1; + return len; + } + + CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld " + "char_count=%d *eof=%d\n", page, *num_items_returned, + items_to_skip, char_count, *eof); + + if (*eof) + return 0; + + /* The idx is there to help debug when things go belly up. */ + len = pp_stats_line(&ppi, 0); + /* Don't advance the outp unless the whole line was printed */ + if (len >= ppi.char_count) { + *ppi.outp = '\0'; + return ppi.outp - page; + } + if (len) { + ppi.outp += len; + ppi.char_count -= len; + (*num_items_returned)++; + } + + spin_lock_bh(&iface_stat_list_lock); + list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) { + struct rb_node *node; + spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock); + for (node = rb_first(&ppi.iface_entry->tag_stat_tree); + node; + node = rb_next(node)) { + ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node); + if (!pp_sets(&ppi)) { + spin_unlock_bh( + &ppi.iface_entry->tag_stat_list_lock); + spin_unlock_bh(&iface_stat_list_lock); + return ppi.outp - page; + } + } + spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock); + } + spin_unlock_bh(&iface_stat_list_lock); + + *eof = 1; + return ppi.outp - page; +} + +/*------------------------------------------*/ +static int qtudev_open(struct inode *inode, struct file *file) +{ + struct uid_tag_data *utd_entry; + struct proc_qtu_data *pqd_entry; + struct proc_qtu_data *new_pqd_entry; + int res; + bool utd_entry_found; + + if (unlikely(qtu_proc_handling_passive)) + return 0; + + DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n", + current->pid, current->tgid, current_fsuid()); + + spin_lock_bh(&uid_tag_data_tree_lock); + + /* Look for existing uid data, or alloc one. */ + utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); + if (IS_ERR_OR_NULL(utd_entry)) { + res = PTR_ERR(utd_entry); + goto err; + } + + /* Look for existing PID based proc_data */ + pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree, + current->tgid); + if (pqd_entry) { + pr_err("qtaguid: qtudev_open(): %u/%u %u " + "%s already opened\n", + current->pid, current->tgid, current_fsuid(), + QTU_DEV_NAME); + res = -EBUSY; + goto err_unlock_free_utd; + } + + new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC); + if (!new_pqd_entry) { + pr_err("qtaguid: qtudev_open(): %u/%u %u: " + "proc data alloc failed\n", + current->pid, current->tgid, current_fsuid()); + res = -ENOMEM; + goto err_unlock_free_utd; + } + new_pqd_entry->pid = current->tgid; + INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list); + new_pqd_entry->parent_tag_data = utd_entry; + utd_entry->num_pqd++; + + proc_qtu_data_tree_insert(new_pqd_entry, + &proc_qtu_data_tree); + + spin_unlock_bh(&uid_tag_data_tree_lock); + DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n", + current_fsuid(), new_pqd_entry); + file->private_data = new_pqd_entry; + return 0; + +err_unlock_free_utd: + if (!utd_entry_found) { + rb_erase(&utd_entry->node, &uid_tag_data_tree); + kfree(utd_entry); + } + spin_unlock_bh(&uid_tag_data_tree_lock); +err: + return res; +} + +static int qtudev_release(struct inode *inode, struct file *file) +{ + struct proc_qtu_data *pqd_entry = file->private_data; + struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data; + struct sock_tag *st_entry; + struct rb_root st_to_free_tree = RB_ROOT; + struct list_head *entry, *next; + struct tag_ref *tr; + + if (unlikely(qtu_proc_handling_passive)) + return 0; + + /* + * Do not trust the current->pid, it might just be a kworker cleaning + * up after a dead proc. + */ + DR_DEBUG("qtaguid: qtudev_release(): " + "pid=%u tgid=%u uid=%u " + "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n", + current->pid, current->tgid, pqd_entry->parent_tag_data->uid, + pqd_entry, pqd_entry->pid, utd_entry, + utd_entry->num_active_tags); + + spin_lock_bh(&sock_tag_list_lock); + spin_lock_bh(&uid_tag_data_tree_lock); + + list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) { + st_entry = list_entry(entry, struct sock_tag, list); + DR_DEBUG("qtaguid: %s(): " + "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n", + __func__, + st_entry, st_entry->sk, + current->pid, current->tgid, + pqd_entry->parent_tag_data->uid); + + utd_entry = uid_tag_data_tree_search( + &uid_tag_data_tree, + get_uid_from_tag(st_entry->tag)); + BUG_ON(IS_ERR_OR_NULL(utd_entry)); + DR_DEBUG("qtaguid: %s(): " + "looking for tag=0x%llx in utd_entry=%p\n", __func__, + st_entry->tag, utd_entry); + tr = tag_ref_tree_search(&utd_entry->tag_ref_tree, + st_entry->tag); + BUG_ON(!tr); + BUG_ON(tr->num_sock_tags <= 0); + tr->num_sock_tags--; + free_tag_ref_from_utd_entry(tr, utd_entry); + + rb_erase(&st_entry->sock_node, &sock_tag_tree); + list_del(&st_entry->list); + /* Can't sockfd_put() within spinlock, do it later. */ + sock_tag_tree_insert(st_entry, &st_to_free_tree); + + /* + * Try to free the utd_entry if no other proc_qtu_data is + * using it (num_pqd is 0) and it doesn't have active tags + * (num_active_tags is 0). + */ + put_utd_entry(utd_entry); + } + + rb_erase(&pqd_entry->node, &proc_qtu_data_tree); + BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1); + pqd_entry->parent_tag_data->num_pqd--; + put_utd_entry(pqd_entry->parent_tag_data); + kfree(pqd_entry); + file->private_data = NULL; + + spin_unlock_bh(&uid_tag_data_tree_lock); + spin_unlock_bh(&sock_tag_list_lock); + + + sock_tag_tree_erase(&st_to_free_tree); + + prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__, + current->pid, current->tgid); + return 0; +} + +/*------------------------------------------*/ +static const struct file_operations qtudev_fops = { + .owner = THIS_MODULE, + .open = qtudev_open, + .release = qtudev_release, +}; + +static struct miscdevice qtu_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = QTU_DEV_NAME, + .fops = &qtudev_fops, + /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */ +}; + +/*------------------------------------------*/ +static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir) +{ + int ret; + *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net); + if (!*res_procdir) { + pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n"); + ret = -ENOMEM; + goto no_dir; + } + + xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms, + *res_procdir); + if (!xt_qtaguid_ctrl_file) { + pr_err("qtaguid: failed to create xt_qtaguid/ctrl " + " file\n"); + ret = -ENOMEM; + goto no_ctrl_entry; + } + xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read; + xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write; + + xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms, + *res_procdir); + if (!xt_qtaguid_stats_file) { + pr_err("qtaguid: failed to create xt_qtaguid/stats " + "file\n"); + ret = -ENOMEM; + goto no_stats_entry; + } + xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read; + /* + * TODO: add support counter hacking + * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write; + */ + return 0; + +no_stats_entry: + remove_proc_entry("ctrl", *res_procdir); +no_ctrl_entry: + remove_proc_entry("xt_qtaguid", NULL); +no_dir: + return ret; +} + +static struct xt_match qtaguid_mt_reg __read_mostly = { + /* + * This module masquerades as the "owner" module so that iptables + * tools can deal with it. + */ + .name = "owner", + .revision = 1, + .family = NFPROTO_UNSPEC, + .match = qtaguid_mt, + .matchsize = sizeof(struct xt_qtaguid_match_info), + .me = THIS_MODULE, +}; + +static int __init qtaguid_mt_init(void) +{ + if (qtaguid_proc_register(&xt_qtaguid_procdir) + || iface_stat_init(xt_qtaguid_procdir) + || xt_register_match(&qtaguid_mt_reg) + || misc_register(&qtu_device)) + return -1; + return 0; +} + +/* + * TODO: allow unloading of the module. + * For now stats are permanent. + * Kconfig forces'y/n' and never an 'm'. + */ + +module_init(qtaguid_mt_init); +MODULE_AUTHOR("jpa "); +MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_owner"); +MODULE_ALIAS("ip6t_owner"); +MODULE_ALIAS("ipt_qtaguid"); +MODULE_ALIAS("ip6t_qtaguid"); diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h new file mode 100644 index 000000000000..02479d6d317d --- /dev/null +++ b/net/netfilter/xt_qtaguid_internal.h @@ -0,0 +1,330 @@ +/* + * Kernel iptables module to track stats for packets based on user tags. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __XT_QTAGUID_INTERNAL_H__ +#define __XT_QTAGUID_INTERNAL_H__ + +#include +#include +#include +#include + +/* Iface handling */ +#define IDEBUG_MASK (1<<0) +/* Iptable Matching. Per packet. */ +#define MDEBUG_MASK (1<<1) +/* Red-black tree handling. Per packet. */ +#define RDEBUG_MASK (1<<2) +/* procfs ctrl/stats handling */ +#define CDEBUG_MASK (1<<3) +/* dev and resource tracking */ +#define DDEBUG_MASK (1<<4) + +/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */ +#define DEFAULT_DEBUG_MASK 0 + +/* + * (Un)Define these *DEBUG to compile out/in the pr_debug calls. + * All undef: text size ~ 0x3030; all def: ~ 0x4404. + */ +#define IDEBUG +#define MDEBUG +#define RDEBUG +#define CDEBUG +#define DDEBUG + +#define MSK_DEBUG(mask, ...) do { \ + if (unlikely(qtaguid_debug_mask & (mask))) \ + pr_debug(__VA_ARGS__); \ + } while (0) +#ifdef IDEBUG +#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__) +#else +#define IF_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef MDEBUG +#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__) +#else +#define MT_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef RDEBUG +#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__) +#else +#define RB_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef CDEBUG +#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__) +#else +#define CT_DEBUG(...) no_printk(__VA_ARGS__) +#endif +#ifdef DDEBUG +#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__) +#else +#define DR_DEBUG(...) no_printk(__VA_ARGS__) +#endif + +extern uint qtaguid_debug_mask; + +/*---------------------------------------------------------------------------*/ +/* + * Tags: + * + * They represent what the data usage counters will be tracked against. + * By default a tag is just based on the UID. + * The UID is used as the base for policing, and can not be ignored. + * So a tag will always at least represent a UID (uid_tag). + * + * A tag can be augmented with an "accounting tag" which is associated + * with a UID. + * User space can set the acct_tag portion of the tag which is then used + * with sockets: all data belonging to that socket will be counted against the + * tag. The policing is then based on the tag's uid_tag portion, + * and stats are collected for the acct_tag portion separately. + * + * There could be + * a: {acct_tag=1, uid_tag=10003} + * b: {acct_tag=2, uid_tag=10003} + * c: {acct_tag=3, uid_tag=10003} + * d: {acct_tag=0, uid_tag=10003} + * a, b, and c represent tags associated with specific sockets. + * d is for the totals for that uid, including all untagged traffic. + * Typically d is used with policing/quota rules. + * + * We want tag_t big enough to distinguish uid_t and acct_tag. + * It might become a struct if needed. + * Nothing should be using it as an int. + */ +typedef uint64_t tag_t; /* Only used via accessors */ + +#define TAG_UID_MASK 0xFFFFFFFFULL +#define TAG_ACCT_MASK (~0xFFFFFFFFULL) + +static inline int tag_compare(tag_t t1, tag_t t2) +{ + return t1 < t2 ? -1 : t1 == t2 ? 0 : 1; +} + +static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid) +{ + return acct_tag | uid; +} +static inline tag_t make_tag_from_uid(uid_t uid) +{ + return uid; +} +static inline uid_t get_uid_from_tag(tag_t tag) +{ + return tag & TAG_UID_MASK; +} +static inline tag_t get_utag_from_tag(tag_t tag) +{ + return tag & TAG_UID_MASK; +} +static inline tag_t get_atag_from_tag(tag_t tag) +{ + return tag & TAG_ACCT_MASK; +} + +static inline bool valid_atag(tag_t tag) +{ + return !(tag & TAG_UID_MASK); +} +static inline tag_t make_atag_from_value(uint32_t value) +{ + return (uint64_t)value << 32; +} +/*---------------------------------------------------------------------------*/ + +/* + * Maximum number of socket tags that a UID is allowed to have active. + * Multiple processes belonging to the same UID contribute towards this limit. + * Special UIDs that can impersonate a UID also contribute (e.g. download + * manager, ...) + */ +#define DEFAULT_MAX_SOCK_TAGS 1024 + +/* + * For now we only track 2 sets of counters. + * The default set is 0. + * Userspace can activate another set for a given uid being tracked. + */ +#define IFS_MAX_COUNTER_SETS 2 + +enum ifs_tx_rx { + IFS_TX, + IFS_RX, + IFS_MAX_DIRECTIONS +}; + +/* For now, TCP, UDP, the rest */ +enum ifs_proto { + IFS_TCP, + IFS_UDP, + IFS_PROTO_OTHER, + IFS_MAX_PROTOS +}; + +struct byte_packet_counters { + uint64_t bytes; + uint64_t packets; +}; + +struct data_counters { + struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; +}; + +/* Generic X based nodes used as a base for rb_tree ops */ +struct tag_node { + struct rb_node node; + tag_t tag; +}; + +struct tag_stat { + struct tag_node tn; + struct data_counters counters; + /* + * If this tag is acct_tag based, we need to count against the + * matching parent uid_tag. + */ + struct data_counters *parent_counters; +}; + +struct iface_stat { + struct list_head list; /* in iface_stat_list */ + char *ifname; + bool active; + /* net_dev is only valid for active iface_stat */ + struct net_device *net_dev; + + struct byte_packet_counters totals[IFS_MAX_DIRECTIONS]; + /* + * We keep the last_known, because some devices reset their counters + * just before NETDEV_UP, while some will reset just before + * NETDEV_REGISTER (which is more normal). + * So now, if the device didn't do a NETDEV_UNREGISTER and we see + * its current dev stats smaller that what was previously known, we + * assume an UNREGISTER and just use the last_known. + */ + struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS]; + /* last_known is usable when last_known_valid is true */ + bool last_known_valid; + + struct proc_dir_entry *proc_ptr; + + struct rb_root tag_stat_tree; + spinlock_t tag_stat_list_lock; +}; + +/* This is needed to create proc_dir_entries from atomic context. */ +struct iface_stat_work { + struct work_struct iface_work; + struct iface_stat *iface_entry; +}; + +/* + * Track tag that this socket is transferring data for, and not necessarily + * the uid that owns the socket. + * This is the tag against which tag_stat.counters will be billed. + * These structs need to be looked up by sock and pid. + */ +struct sock_tag { + struct rb_node sock_node; + struct sock *sk; /* Only used as a number, never dereferenced */ + /* The socket is needed for sockfd_put() */ + struct socket *socket; + /* Used to associate with a given pid */ + struct list_head list; /* in proc_qtu_data.sock_tag_list */ + pid_t pid; + + tag_t tag; +}; + +struct qtaguid_event_counts { + /* Various successful events */ + atomic64_t sockets_tagged; + atomic64_t sockets_untagged; + atomic64_t counter_set_changes; + atomic64_t delete_cmds; + atomic64_t iface_events; /* Number of NETDEV_* events handled */ + + atomic64_t match_calls; /* Number of times iptables called mt */ + /* + * match_found_sk_*: numbers related to the netfilter matching + * function finding a sock for the sk_buff. + * Total skbs processed is sum(match_found*). + */ + atomic64_t match_found_sk; /* An sk was already in the sk_buff. */ + /* The connection tracker had or didn't have the sk. */ + atomic64_t match_found_sk_in_ct; + atomic64_t match_found_no_sk_in_ct; + /* + * No sk could be found. No apparent owner. Could happen with + * unsolicited traffic. + */ + atomic64_t match_no_sk; + /* + * The file ptr in the sk_socket wasn't there. + * This might happen for traffic while the socket is being closed. + */ + atomic64_t match_no_sk_file; +}; + +/* Track the set active_set for the given tag. */ +struct tag_counter_set { + struct tag_node tn; + int active_set; +}; + +/*----------------------------------------------*/ +/* + * The qtu uid data is used to track resources that are created directly or + * indirectly by processes (uid tracked). + * It is shared by the processes with the same uid. + * Some of the resource will be counted to prevent further rogue allocations, + * some will need freeing once the owner process (uid) exits. + */ +struct uid_tag_data { + struct rb_node node; + uid_t uid; + + /* + * For the uid, how many accounting tags have been set. + */ + int num_active_tags; + /* Track the number of proc_qtu_data that reference it */ + int num_pqd; + struct rb_root tag_ref_tree; + /* No tag_node_tree_lock; use uid_tag_data_tree_lock */ +}; + +struct tag_ref { + struct tag_node tn; + + /* + * This tracks the number of active sockets that have a tag on them + * which matches this tag_ref.tn.tag. + * A tag ref can live on after the sockets are untagged. + * A tag ref can only be removed during a tag delete command. + */ + int num_sock_tags; +}; + +struct proc_qtu_data { + struct rb_node node; + pid_t pid; + + struct uid_tag_data *parent_tag_data; + + /* Tracks the sock_tags that need freeing upon this proc's death */ + struct list_head sock_tag_list; + /* No spinlock_t sock_tag_list_lock; use the global one. */ +}; + +/*----------------------------------------------*/ +#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */ diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c new file mode 100644 index 000000000000..39176785c91f --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.c @@ -0,0 +1,556 @@ +/* + * Pretty printing Support for iptables xt_qtaguid module. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Most of the functions in this file just waste time if DEBUG is not defined. + * The matching xt_qtaguid_print.h will static inline empty funcs if the needed + * debug flags ore not defined. + * Those funcs that fail to allocate memory will panic as there is no need to + * hobble allong just pretending to do the requested work. + */ + +#define DEBUG + +#include +#include +#include +#include +#include +#include + + +#include "xt_qtaguid_internal.h" +#include "xt_qtaguid_print.h" + +#ifdef DDEBUG + +static void _bug_on_err_or_null(void *ptr) +{ + if (IS_ERR_OR_NULL(ptr)) { + pr_err("qtaguid: kmalloc failed\n"); + BUG(); + } +} + +char *pp_tag_t(tag_t *tag) +{ + char *res; + + if (!tag) + res = kasprintf(GFP_ATOMIC, "tag_t@null{}"); + else + res = kasprintf(GFP_ATOMIC, + "tag_t@%p{tag=0x%llx, uid=%u}", + tag, *tag, get_uid_from_tag(*tag)); + _bug_on_err_or_null(res); + return res; +} + +char *pp_data_counters(struct data_counters *dc, bool showValues) +{ + char *res; + + if (!dc) + res = kasprintf(GFP_ATOMIC, "data_counters@null{}"); + else if (showValues) + res = kasprintf( + GFP_ATOMIC, "data_counters@%p{" + "set0{" + "rx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}, " + "tx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}}, " + "set1{" + "rx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}, " + "tx{" + "tcp{b=%llu, p=%llu}, " + "udp{b=%llu, p=%llu}," + "other{b=%llu, p=%llu}}}}", + dc, + dc->bpc[0][IFS_RX][IFS_TCP].bytes, + dc->bpc[0][IFS_RX][IFS_TCP].packets, + dc->bpc[0][IFS_RX][IFS_UDP].bytes, + dc->bpc[0][IFS_RX][IFS_UDP].packets, + dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes, + dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets, + dc->bpc[0][IFS_TX][IFS_TCP].bytes, + dc->bpc[0][IFS_TX][IFS_TCP].packets, + dc->bpc[0][IFS_TX][IFS_UDP].bytes, + dc->bpc[0][IFS_TX][IFS_UDP].packets, + dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes, + dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets, + dc->bpc[1][IFS_RX][IFS_TCP].bytes, + dc->bpc[1][IFS_RX][IFS_TCP].packets, + dc->bpc[1][IFS_RX][IFS_UDP].bytes, + dc->bpc[1][IFS_RX][IFS_UDP].packets, + dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes, + dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets, + dc->bpc[1][IFS_TX][IFS_TCP].bytes, + dc->bpc[1][IFS_TX][IFS_TCP].packets, + dc->bpc[1][IFS_TX][IFS_UDP].bytes, + dc->bpc[1][IFS_TX][IFS_UDP].packets, + dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes, + dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets); + else + res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc); + _bug_on_err_or_null(res); + return res; +} + +char *pp_tag_node(struct tag_node *tn) +{ + char *tag_str; + char *res; + + if (!tn) { + res = kasprintf(GFP_ATOMIC, "tag_node@null{}"); + _bug_on_err_or_null(res); + return res; + } + tag_str = pp_tag_t(&tn->tag); + res = kasprintf(GFP_ATOMIC, + "tag_node@%p{tag=%s}", + tn, tag_str); + _bug_on_err_or_null(res); + kfree(tag_str); + return res; +} + +char *pp_tag_ref(struct tag_ref *tr) +{ + char *tn_str; + char *res; + + if (!tr) { + res = kasprintf(GFP_ATOMIC, "tag_ref@null{}"); + _bug_on_err_or_null(res); + return res; + } + tn_str = pp_tag_node(&tr->tn); + res = kasprintf(GFP_ATOMIC, + "tag_ref@%p{%s, num_sock_tags=%d}", + tr, tn_str, tr->num_sock_tags); + _bug_on_err_or_null(res); + kfree(tn_str); + return res; +} + +char *pp_tag_stat(struct tag_stat *ts) +{ + char *tn_str; + char *counters_str; + char *parent_counters_str; + char *res; + + if (!ts) { + res = kasprintf(GFP_ATOMIC, "tag_stat@null{}"); + _bug_on_err_or_null(res); + return res; + } + tn_str = pp_tag_node(&ts->tn); + counters_str = pp_data_counters(&ts->counters, true); + parent_counters_str = pp_data_counters(ts->parent_counters, false); + res = kasprintf(GFP_ATOMIC, + "tag_stat@%p{%s, counters=%s, parent_counters=%s}", + ts, tn_str, counters_str, parent_counters_str); + _bug_on_err_or_null(res); + kfree(tn_str); + kfree(counters_str); + kfree(parent_counters_str); + return res; +} + +char *pp_iface_stat(struct iface_stat *is) +{ + char *res; + if (!is) + res = kasprintf(GFP_ATOMIC, "iface_stat@null{}"); + else + res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" + "list=list_head{...}, " + "ifname=%s, " + "total={rx={bytes=%llu, " + "packets=%llu}, " + "tx={bytes=%llu, " + "packets=%llu}}, " + "last_known_valid=%d, " + "last_known={rx={bytes=%llu, " + "packets=%llu}, " + "tx={bytes=%llu, " + "packets=%llu}}, " + "active=%d, " + "net_dev=%p, " + "proc_ptr=%p, " + "tag_stat_tree=rb_root{...}}", + is, + is->ifname, + is->totals[IFS_RX].bytes, + is->totals[IFS_RX].packets, + is->totals[IFS_TX].bytes, + is->totals[IFS_TX].packets, + is->last_known_valid, + is->last_known[IFS_RX].bytes, + is->last_known[IFS_RX].packets, + is->last_known[IFS_TX].bytes, + is->last_known[IFS_TX].packets, + is->active, + is->net_dev, + is->proc_ptr); + _bug_on_err_or_null(res); + return res; +} + +char *pp_sock_tag(struct sock_tag *st) +{ + char *tag_str; + char *res; + + if (!st) { + res = kasprintf(GFP_ATOMIC, "sock_tag@null{}"); + _bug_on_err_or_null(res); + return res; + } + tag_str = pp_tag_t(&st->tag); + res = kasprintf(GFP_ATOMIC, "sock_tag@%p{" + "sock_node=rb_node{...}, " + "sk=%p socket=%p (f_count=%lu), list=list_head{...}, " + "pid=%u, tag=%s}", + st, st->sk, st->socket, atomic_long_read( + &st->socket->file->f_count), + st->pid, tag_str); + _bug_on_err_or_null(res); + kfree(tag_str); + return res; +} + +char *pp_uid_tag_data(struct uid_tag_data *utd) +{ + char *res; + + if (!utd) + res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}"); + else + res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{" + "uid=%u, num_active_acct_tags=%d, " + "num_pqd=%d, " + "tag_node_tree=rb_root{...}, " + "proc_qtu_data_tree=rb_root{...}}", + utd, utd->uid, + utd->num_active_tags, utd->num_pqd); + _bug_on_err_or_null(res); + return res; +} + +char *pp_proc_qtu_data(struct proc_qtu_data *pqd) +{ + char *parent_tag_data_str; + char *res; + + if (!pqd) { + res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}"); + _bug_on_err_or_null(res); + return res; + } + parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data); + res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{" + "node=rb_node{...}, pid=%u, " + "parent_tag_data=%s, " + "sock_tag_list=list_head{...}}", + pqd, pqd->pid, parent_tag_data_str + ); + _bug_on_err_or_null(res); + kfree(parent_tag_data_str); + return res; +} + +/*------------------------------------------*/ +void prdebug_sock_tag_tree(int indent_level, + struct rb_root *sock_tag_tree) +{ + struct rb_node *node; + struct sock_tag *sock_tag_entry; + char *str; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(sock_tag_tree)) { + str = "sock_tag_tree=rb_root{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "sock_tag_tree=rb_root{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(sock_tag_tree); + node; + node = rb_next(node)) { + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); + str = pp_sock_tag(sock_tag_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_sock_tag_list(int indent_level, + struct list_head *sock_tag_list) +{ + struct sock_tag *sock_tag_entry; + char *str; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (list_empty(sock_tag_list)) { + str = "sock_tag_list=list_head{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "sock_tag_list=list_head{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + list_for_each_entry(sock_tag_entry, sock_tag_list, list) { + str = pp_sock_tag(sock_tag_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_proc_qtu_data_tree(int indent_level, + struct rb_root *proc_qtu_data_tree) +{ + char *str; + struct rb_node *node; + struct proc_qtu_data *proc_qtu_data_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(proc_qtu_data_tree)) { + str = "proc_qtu_data_tree=rb_root{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "proc_qtu_data_tree=rb_root{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(proc_qtu_data_tree); + node; + node = rb_next(node)) { + proc_qtu_data_entry = rb_entry(node, + struct proc_qtu_data, + node); + str = pp_proc_qtu_data(proc_qtu_data_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, + str); + kfree(str); + indent_level++; + prdebug_sock_tag_list(indent_level, + &proc_qtu_data_entry->sock_tag_list); + indent_level--; + + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) +{ + char *str; + struct rb_node *node; + struct tag_ref *tag_ref_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(tag_ref_tree)) { + str = "tag_ref_tree{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "tag_ref_tree{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(tag_ref_tree); + node; + node = rb_next(node)) { + tag_ref_entry = rb_entry(node, + struct tag_ref, + tn.node); + str = pp_tag_ref(tag_ref_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, + str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_uid_tag_data_tree(int indent_level, + struct rb_root *uid_tag_data_tree) +{ + char *str; + struct rb_node *node; + struct uid_tag_data *uid_tag_data_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(uid_tag_data_tree)) { + str = "uid_tag_data_tree=rb_root{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "uid_tag_data_tree=rb_root{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(uid_tag_data_tree); + node; + node = rb_next(node)) { + uid_tag_data_entry = rb_entry(node, struct uid_tag_data, + node); + str = pp_uid_tag_data(uid_tag_data_entry); + pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); + kfree(str); + if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) { + indent_level++; + prdebug_tag_ref_tree(indent_level, + &uid_tag_data_entry->tag_ref_tree); + indent_level--; + } + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_tag_stat_tree(int indent_level, + struct rb_root *tag_stat_tree) +{ + char *str; + struct rb_node *node; + struct tag_stat *ts_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (RB_EMPTY_ROOT(tag_stat_tree)) { + str = "tag_stat_tree{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "tag_stat_tree{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + for (node = rb_first(tag_stat_tree); + node; + node = rb_next(node)) { + ts_entry = rb_entry(node, struct tag_stat, tn.node); + str = pp_tag_stat(ts_entry); + pr_debug("%*d: %s\n", indent_level*2, indent_level, + str); + kfree(str); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +void prdebug_iface_stat_list(int indent_level, + struct list_head *iface_stat_list) +{ + char *str; + struct iface_stat *iface_entry; + + if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) + return; + + if (list_empty(iface_stat_list)) { + str = "iface_stat_list=list_head{}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + return; + } + + str = "iface_stat_list=list_head{"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + indent_level++; + list_for_each_entry(iface_entry, iface_stat_list, list) { + str = pp_iface_stat(iface_entry); + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); + kfree(str); + + spin_lock_bh(&iface_entry->tag_stat_list_lock); + if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) { + indent_level++; + prdebug_tag_stat_tree(indent_level, + &iface_entry->tag_stat_tree); + indent_level--; + } + spin_unlock_bh(&iface_entry->tag_stat_list_lock); + } + indent_level--; + str = "}"; + pr_debug("%*d: %s\n", indent_level*2, indent_level, str); +} + +#endif /* ifdef DDEBUG */ +/*------------------------------------------*/ +static const char * const netdev_event_strings[] = { + "netdev_unknown", + "NETDEV_UP", + "NETDEV_DOWN", + "NETDEV_REBOOT", + "NETDEV_CHANGE", + "NETDEV_REGISTER", + "NETDEV_UNREGISTER", + "NETDEV_CHANGEMTU", + "NETDEV_CHANGEADDR", + "NETDEV_GOING_DOWN", + "NETDEV_CHANGENAME", + "NETDEV_FEAT_CHANGE", + "NETDEV_BONDING_FAILOVER", + "NETDEV_PRE_UP", + "NETDEV_PRE_TYPE_CHANGE", + "NETDEV_POST_TYPE_CHANGE", + "NETDEV_POST_INIT", + "NETDEV_UNREGISTER_BATCH", + "NETDEV_RELEASE", + "NETDEV_NOTIFY_PEERS", + "NETDEV_JOIN", +}; + +const char *netdev_evt_str(int netdev_event) +{ + if (netdev_event < 0 + || netdev_event >= ARRAY_SIZE(netdev_event_strings)) + return "bad event num"; + return netdev_event_strings[netdev_event]; +} diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h new file mode 100644 index 000000000000..b63871a0be5a --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.h @@ -0,0 +1,120 @@ +/* + * Pretty printing Support for iptables xt_qtaguid module. + * + * (C) 2011 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __XT_QTAGUID_PRINT_H__ +#define __XT_QTAGUID_PRINT_H__ + +#include "xt_qtaguid_internal.h" + +#ifdef DDEBUG + +char *pp_tag_t(tag_t *tag); +char *pp_data_counters(struct data_counters *dc, bool showValues); +char *pp_tag_node(struct tag_node *tn); +char *pp_tag_ref(struct tag_ref *tr); +char *pp_tag_stat(struct tag_stat *ts); +char *pp_iface_stat(struct iface_stat *is); +char *pp_sock_tag(struct sock_tag *st); +char *pp_uid_tag_data(struct uid_tag_data *qtd); +char *pp_proc_qtu_data(struct proc_qtu_data *pqd); + +/*------------------------------------------*/ +void prdebug_sock_tag_list(int indent_level, + struct list_head *sock_tag_list); +void prdebug_sock_tag_tree(int indent_level, + struct rb_root *sock_tag_tree); +void prdebug_proc_qtu_data_tree(int indent_level, + struct rb_root *proc_qtu_data_tree); +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree); +void prdebug_uid_tag_data_tree(int indent_level, + struct rb_root *uid_tag_data_tree); +void prdebug_tag_stat_tree(int indent_level, + struct rb_root *tag_stat_tree); +void prdebug_iface_stat_list(int indent_level, + struct list_head *iface_stat_list); + +#else + +/*------------------------------------------*/ +static inline char *pp_tag_t(tag_t *tag) +{ + return NULL; +} +static inline char *pp_data_counters(struct data_counters *dc, bool showValues) +{ + return NULL; +} +static inline char *pp_tag_node(struct tag_node *tn) +{ + return NULL; +} +static inline char *pp_tag_ref(struct tag_ref *tr) +{ + return NULL; +} +static inline char *pp_tag_stat(struct tag_stat *ts) +{ + return NULL; +} +static inline char *pp_iface_stat(struct iface_stat *is) +{ + return NULL; +} +static inline char *pp_sock_tag(struct sock_tag *st) +{ + return NULL; +} +static inline char *pp_uid_tag_data(struct uid_tag_data *qtd) +{ + return NULL; +} +static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd) +{ + return NULL; +} + +/*------------------------------------------*/ +static inline +void prdebug_sock_tag_list(int indent_level, + struct list_head *sock_tag_list) +{ +} +static inline +void prdebug_sock_tag_tree(int indent_level, + struct rb_root *sock_tag_tree) +{ +} +static inline +void prdebug_proc_qtu_data_tree(int indent_level, + struct rb_root *proc_qtu_data_tree) +{ +} +static inline +void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) +{ +} +static inline +void prdebug_uid_tag_data_tree(int indent_level, + struct rb_root *uid_tag_data_tree) +{ +} +static inline +void prdebug_tag_stat_tree(int indent_level, + struct rb_root *tag_stat_tree) +{ +} +static inline +void prdebug_iface_stat_list(int indent_level, + struct list_head *iface_stat_list) +{ +} +#endif +/*------------------------------------------*/ +const char *netdev_evt_str(int netdev_event); +#endif /* ifndef __XT_QTAGUID_PRINT_H__ */ -- GitLab From b702927cddbf3e98cb49410a23f2eb824d33a9cd Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Tue, 21 Jun 2011 11:14:49 -0700 Subject: [PATCH 0640/1442] ANDROID: netfilter: adding the original quota2 from xtables-addons The original xt_quota in the kernel is plain broken: - counts quota at a per CPU level (was written back when ubiquitous SMP was just a dream) - provides no way to count across IPV4/IPV6. This patch is the original unaltered code from: http://sourceforge.net/projects/xtables-addons at commit e84391ce665cef046967f796dd91026851d6bbf3 Change-Id: I19d49858840effee9ecf6cff03c23b45a97efdeb Signed-off-by: JP Abgrall --- include/linux/netfilter/xt_quota2.h | 25 +++ net/netfilter/xt_quota2.c | 274 ++++++++++++++++++++++++++++ 2 files changed, 299 insertions(+) create mode 100644 include/linux/netfilter/xt_quota2.h create mode 100644 net/netfilter/xt_quota2.c diff --git a/include/linux/netfilter/xt_quota2.h b/include/linux/netfilter/xt_quota2.h new file mode 100644 index 000000000000..eadc6903314e --- /dev/null +++ b/include/linux/netfilter/xt_quota2.h @@ -0,0 +1,25 @@ +#ifndef _XT_QUOTA_H +#define _XT_QUOTA_H + +enum xt_quota_flags { + XT_QUOTA_INVERT = 1 << 0, + XT_QUOTA_GROW = 1 << 1, + XT_QUOTA_PACKET = 1 << 2, + XT_QUOTA_NO_CHANGE = 1 << 3, + XT_QUOTA_MASK = 0x0F, +}; + +struct xt_quota_counter; + +struct xt_quota_mtinfo2 { + char name[15]; + u_int8_t flags; + + /* Comparison-invariant */ + aligned_u64 quota; + + /* Used internally by the kernel */ + struct xt_quota_counter *master __attribute__((aligned(8))); +}; + +#endif /* _XT_QUOTA_H */ diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c new file mode 100644 index 000000000000..4857008f1eb0 --- /dev/null +++ b/net/netfilter/xt_quota2.c @@ -0,0 +1,274 @@ +/* + * xt_quota2 - enhanced xt_quota that can count upwards and in packets + * as a minimal accounting match. + * by Jan Engelhardt , 2008 + * + * Originally based on xt_quota.c: + * netfilter module to enforce network quotas + * Sam Johnston + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License; either + * version 2 of the License, as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +#include +#include "xt_quota2.h" +#include "compat_xtables.h" + +/** + * @lock: lock to protect quota writers from each other + */ +struct xt_quota_counter { + u_int64_t quota; + spinlock_t lock; + struct list_head list; + atomic_t ref; + char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)]; + struct proc_dir_entry *procfs_entry; +}; + +static LIST_HEAD(counter_list); +static DEFINE_SPINLOCK(counter_list_lock); + +static struct proc_dir_entry *proc_xt_quota; +static unsigned int quota_list_perms = S_IRUGO | S_IWUSR; +static unsigned int quota_list_uid = 0; +static unsigned int quota_list_gid = 0; +module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR); +module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR); +module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR); + +static int quota_proc_read(char *page, char **start, off_t offset, + int count, int *eof, void *data) +{ + struct xt_quota_counter *e = data; + int ret; + + spin_lock_bh(&e->lock); + ret = snprintf(page, PAGE_SIZE, "%llu\n", e->quota); + spin_unlock_bh(&e->lock); + return ret; +} + +static int quota_proc_write(struct file *file, const char __user *input, + unsigned long size, void *data) +{ + struct xt_quota_counter *e = data; + char buf[sizeof("18446744073709551616")]; + + if (size > sizeof(buf)) + size = sizeof(buf); + if (copy_from_user(buf, input, size) != 0) + return -EFAULT; + buf[sizeof(buf)-1] = '\0'; + + spin_lock_bh(&e->lock); + e->quota = simple_strtoull(buf, NULL, 0); + spin_unlock_bh(&e->lock); + return size; +} + +static struct xt_quota_counter * +q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon) +{ + struct xt_quota_counter *e; + unsigned int size; + + /* Do not need all the procfs things for anonymous counters. */ + size = anon ? offsetof(typeof(*e), list) : sizeof(*e); + e = kmalloc(size, GFP_KERNEL); + if (e == NULL) + return NULL; + + e->quota = q->quota; + spin_lock_init(&e->lock); + if (!anon) { + INIT_LIST_HEAD(&e->list); + atomic_set(&e->ref, 1); + strncpy(e->name, q->name, sizeof(e->name)); + } + return e; +} + +/** + * q2_get_counter - get ref to counter or create new + * @name: name of counter + */ +static struct xt_quota_counter * +q2_get_counter(const struct xt_quota_mtinfo2 *q) +{ + struct proc_dir_entry *p; + struct xt_quota_counter *e; + + if (*q->name == '\0') + return q2_new_counter(q, true); + + spin_lock_bh(&counter_list_lock); + list_for_each_entry(e, &counter_list, list) + if (strcmp(e->name, q->name) == 0) { + atomic_inc(&e->ref); + spin_unlock_bh(&counter_list_lock); + return e; + } + + e = q2_new_counter(q, false); + if (e == NULL) + goto out; + + p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms, + proc_xt_quota); + if (p == NULL || IS_ERR(p)) + goto out; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 29) + p->owner = THIS_MODULE; +#endif + p->data = e; + p->read_proc = quota_proc_read; + p->write_proc = quota_proc_write; + p->uid = quota_list_uid; + p->gid = quota_list_gid; + list_add_tail(&e->list, &counter_list); + spin_unlock_bh(&counter_list_lock); + return e; + + out: + spin_unlock_bh(&counter_list_lock); + kfree(e); + return NULL; +} + +static int quota_mt2_check(const struct xt_mtchk_param *par) +{ + struct xt_quota_mtinfo2 *q = par->matchinfo; + + if (q->flags & ~XT_QUOTA_MASK) + return -EINVAL; + + q->name[sizeof(q->name)-1] = '\0'; + if (*q->name == '.' || strchr(q->name, '/') != NULL) { + printk(KERN_ERR "xt_quota.3: illegal name\n"); + return -EINVAL; + } + + q->master = q2_get_counter(q); + if (q->master == NULL) { + printk(KERN_ERR "xt_quota.3: memory alloc failure\n"); + return -ENOMEM; + } + + return 0; +} + +static void quota_mt2_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_quota_mtinfo2 *q = par->matchinfo; + struct xt_quota_counter *e = q->master; + + if (*q->name == '\0') { + kfree(e); + return; + } + + spin_lock_bh(&counter_list_lock); + if (!atomic_dec_and_test(&e->ref)) { + spin_unlock_bh(&counter_list_lock); + return; + } + + list_del(&e->list); + remove_proc_entry(e->name, proc_xt_quota); + spin_unlock_bh(&counter_list_lock); + kfree(e); +} + +static bool +quota_mt2(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct xt_quota_mtinfo2 *q = (void *)par->matchinfo; + struct xt_quota_counter *e = q->master; + bool ret = q->flags & XT_QUOTA_INVERT; + + spin_lock_bh(&e->lock); + if (q->flags & XT_QUOTA_GROW) { + /* + * While no_change is pointless in "grow" mode, we will + * implement it here simply to have a consistent behavior. + */ + if (!(q->flags & XT_QUOTA_NO_CHANGE)) { + e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; + q->quota = e->quota; + } + ret = true; + } else { + if (e->quota >= skb->len) { + if (!(q->flags & XT_QUOTA_NO_CHANGE)) + e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; + ret = !ret; + } else { + /* we do not allow even small packets from now on */ + e->quota = 0; + } + q->quota = e->quota; + } + spin_unlock_bh(&e->lock); + return ret; +} + +static struct xt_match quota_mt2_reg[] __read_mostly = { + { + .name = "quota2", + .revision = 3, + .family = NFPROTO_IPV4, + .checkentry = quota_mt2_check, + .match = quota_mt2, + .destroy = quota_mt2_destroy, + .matchsize = sizeof(struct xt_quota_mtinfo2), + .me = THIS_MODULE, + }, + { + .name = "quota2", + .revision = 3, + .family = NFPROTO_IPV6, + .checkentry = quota_mt2_check, + .match = quota_mt2, + .destroy = quota_mt2_destroy, + .matchsize = sizeof(struct xt_quota_mtinfo2), + .me = THIS_MODULE, + }, +}; + +static int __init quota_mt2_init(void) +{ + int ret; + + proc_xt_quota = proc_mkdir("xt_quota", init_net__proc_net); + if (proc_xt_quota == NULL) + return -EACCES; + + ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); + if (ret < 0) + remove_proc_entry("xt_quota", init_net__proc_net); + return ret; +} + +static void __exit quota_mt2_exit(void) +{ + xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); + remove_proc_entry("xt_quota", init_net__proc_net); +} + +module_init(quota_mt2_init); +module_exit(quota_mt2_exit); +MODULE_DESCRIPTION("Xtables: countdown quota match; up counter"); +MODULE_AUTHOR("Sam Johnston "); +MODULE_AUTHOR("Jan Engelhardt "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_quota2"); +MODULE_ALIAS("ip6t_quota2"); -- GitLab From 8f4eba2fbe469e8e20a556257433466312ef74d4 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Tue, 12 Jul 2011 12:02:59 -0700 Subject: [PATCH 0641/1442] ANDROID: netfilter: fixup the quota2, and enable. The xt_quota2 came from http://sourceforge.net/projects/xtables-addons/develop It needed tweaking for it to compile within the kernel tree. Fixed kmalloc() and create_proc_entry() invocations within a non-interruptible context. Removed useless copying of current quota back to the iptable's struct matchinfo: - those are per CPU: they will change randomly based on which cpu gets to update the value. - they prevent matching a rule: e.g. -A chain -m quota2 --name q1 --quota 123 can't be followed by -D chain -m quota2 --name q1 --quota 123 as the 123 will be compared to the struct matchinfo's quota member. Use the NETLINK NETLINK_NFLOG family to log a single message when the quota limit is reached. It uses the same packet type as ipt_ULOG, but - never copies skb data, - uses 112 as the event number (ULOG's +1) It doesn't log if the module param "event_num" is 0. Change-Id: I021d3b743db3b22158cc49acb5c94d905b501492 Signed-off-by: JP Abgrall --- net/netfilter/Kconfig | 24 ++++++ net/netfilter/Makefile | 1 + net/netfilter/xt_quota2.c | 154 ++++++++++++++++++++++++++++++++------ 3 files changed, 156 insertions(+), 23 deletions(-) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 8945065a89b8..99734fed536f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1377,6 +1377,30 @@ config NETFILTER_XT_MATCH_QUOTA If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_QUOTA2 + tristate '"quota2" match support' + depends on NETFILTER_ADVANCED + help + This option adds a `quota2' match, which allows to match on a + byte counter correctly and not per CPU. + It allows naming the quotas. + This is based on http://xtables-addons.git.sourceforge.net + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + +config NETFILTER_XT_MATCH_QUOTA2_LOG + bool '"quota2" Netfilter LOG support' + depends on NETFILTER_XT_MATCH_QUOTA2 + depends on IP_NF_TARGET_ULOG=n # not yes, not module, just no + default n + help + This option allows `quota2' to log ONCE when a quota limit + is passed. It logs via NETLINK using the NETLINK_NFLOG family. + It logs similarly to how ipt_ULOG would without data. + + If unsure, say `N'. + config NETFILTER_XT_MATCH_RATEEST tristate '"rateest" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 59ceaa8ec204..54ba5aa1f9bf 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -169,6 +169,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o obj-$(CONFIG_NETFILTER_XT_MATCH_QTAGUID) += xt_qtaguid_print.o xt_qtaguid.o obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o +obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA2) += xt_quota2.o obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c index 4857008f1eb0..aace72928530 100644 --- a/net/netfilter/xt_quota2.c +++ b/net/netfilter/xt_quota2.c @@ -12,14 +12,18 @@ * version 2 of the License, as published by the Free Software Foundation. */ #include +#include #include #include #include #include +#include #include -#include "xt_quota2.h" -#include "compat_xtables.h" +#include +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +#include +#endif /** * @lock: lock to protect quota writers from each other @@ -33,6 +37,16 @@ struct xt_quota_counter { struct proc_dir_entry *procfs_entry; }; +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +/* Harald's favorite number +1 :D From ipt_ULOG.C */ +static int qlog_nl_event = 112; +module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(event_num, + "Event number for NETLINK_NFLOG message. 0 disables log." + "111 is what ipt_ULOG uses."); +static struct sock *nflognl; +#endif + static LIST_HEAD(counter_list); static DEFINE_SPINLOCK(counter_list_lock); @@ -44,6 +58,70 @@ module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR); module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR); module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR); + +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG +static void quota2_log(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ + ulog_packet_msg_t *pm; + struct sk_buff *log_skb; + size_t size; + struct nlmsghdr *nlh; + + if (!qlog_nl_event) + return; + + size = NLMSG_SPACE(sizeof(*pm)); + size = max(size, (size_t)NLMSG_GOODSIZE); + log_skb = alloc_skb(size, GFP_ATOMIC); + if (!log_skb) { + pr_err("xt_quota2: cannot alloc skb for logging\n"); + return; + } + + nlh = nlmsg_put(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event, + sizeof(*pm), 0); + if (!nlh) { + pr_err("xt_quota2: nlmsg_put failed\n"); + kfree_skb(log_skb); + return; + } + pm = nlmsg_data(nlh); + if (skb->tstamp.tv64 == 0) + __net_timestamp((struct sk_buff *)skb); + pm->data_len = 0; + pm->hook = hooknum; + if (prefix != NULL) + strlcpy(pm->prefix, prefix, sizeof(pm->prefix)); + else + *(pm->prefix) = '\0'; + if (in) + strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name)); + else + pm->indev_name[0] = '\0'; + + if (out) + strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); + else + pm->outdev_name[0] = '\0'; + + NETLINK_CB(log_skb).dst_group = 1; + pr_debug("throwing 1 packets to netlink group 1\n"); + netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC); +} +#else +static void quota2_log(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ +} +#endif /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */ + static int quota_proc_read(char *page, char **start, off_t offset, int count, int *eof, void *data) { @@ -91,7 +169,7 @@ q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon) if (!anon) { INIT_LIST_HEAD(&e->list); atomic_set(&e->ref, 1); - strncpy(e->name, q->name, sizeof(e->name)); + strlcpy(e->name, q->name, sizeof(e->name)); } return e; } @@ -104,42 +182,56 @@ static struct xt_quota_counter * q2_get_counter(const struct xt_quota_mtinfo2 *q) { struct proc_dir_entry *p; - struct xt_quota_counter *e; + struct xt_quota_counter *e = NULL; + struct xt_quota_counter *new_e; if (*q->name == '\0') return q2_new_counter(q, true); + /* No need to hold a lock while getting a new counter */ + new_e = q2_new_counter(q, false); + if (new_e == NULL) + goto out; + spin_lock_bh(&counter_list_lock); list_for_each_entry(e, &counter_list, list) if (strcmp(e->name, q->name) == 0) { atomic_inc(&e->ref); spin_unlock_bh(&counter_list_lock); + kfree(new_e); + pr_debug("xt_quota2: old counter name=%s", e->name); return e; } + e = new_e; + pr_debug("xt_quota2: new_counter name=%s", e->name); + list_add_tail(&e->list, &counter_list); + /* The entry having a refcount of 1 is not directly destructible. + * This func has not yet returned the new entry, thus iptables + * has not references for destroying this entry. + * For another rule to try to destroy it, it would 1st need for this + * func* to be re-invoked, acquire a new ref for the same named quota. + * Nobody will access the e->procfs_entry either. + * So release the lock. */ + spin_unlock_bh(&counter_list_lock); - e = q2_new_counter(q, false); - if (e == NULL) - goto out; - + /* create_proc_entry() is not spin_lock happy */ p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms, proc_xt_quota); - if (p == NULL || IS_ERR(p)) - goto out; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 29) - p->owner = THIS_MODULE; -#endif + if (IS_ERR_OR_NULL(p)) { + spin_lock_bh(&counter_list_lock); + list_del(&e->list); + spin_unlock_bh(&counter_list_lock); + goto out; + } p->data = e; p->read_proc = quota_proc_read; p->write_proc = quota_proc_write; p->uid = quota_list_uid; p->gid = quota_list_gid; - list_add_tail(&e->list, &counter_list); - spin_unlock_bh(&counter_list_lock); return e; out: - spin_unlock_bh(&counter_list_lock); kfree(e); return NULL; } @@ -148,6 +240,8 @@ static int quota_mt2_check(const struct xt_mtchk_param *par) { struct xt_quota_mtinfo2 *q = par->matchinfo; + pr_debug("xt_quota2: check() flags=0x%04x", q->flags); + if (q->flags & ~XT_QUOTA_MASK) return -EINVAL; @@ -203,7 +297,6 @@ quota_mt2(const struct sk_buff *skb, struct xt_action_param *par) */ if (!(q->flags & XT_QUOTA_NO_CHANGE)) { e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; - q->quota = e->quota; } ret = true; } else { @@ -212,10 +305,17 @@ quota_mt2(const struct sk_buff *skb, struct xt_action_param *par) e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; ret = !ret; } else { + /* We are transitioning, log that fact. */ + if (e->quota) { + quota2_log(par->hooknum, + skb, + par->in, + par->out, + q->name); + } /* we do not allow even small packets from now on */ e->quota = 0; } - q->quota = e->quota; } spin_unlock_bh(&e->lock); return ret; @@ -228,7 +328,7 @@ static struct xt_match quota_mt2_reg[] __read_mostly = { .family = NFPROTO_IPV4, .checkentry = quota_mt2_check, .match = quota_mt2, - .destroy = quota_mt2_destroy, + .destroy = quota_mt2_destroy, .matchsize = sizeof(struct xt_quota_mtinfo2), .me = THIS_MODULE, }, @@ -238,7 +338,7 @@ static struct xt_match quota_mt2_reg[] __read_mostly = { .family = NFPROTO_IPV6, .checkentry = quota_mt2_check, .match = quota_mt2, - .destroy = quota_mt2_destroy, + .destroy = quota_mt2_destroy, .matchsize = sizeof(struct xt_quota_mtinfo2), .me = THIS_MODULE, }, @@ -247,21 +347,29 @@ static struct xt_match quota_mt2_reg[] __read_mostly = { static int __init quota_mt2_init(void) { int ret; + pr_debug("xt_quota2: init()"); + +#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG + nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, NULL); + if (!nflognl) + return -ENOMEM; +#endif - proc_xt_quota = proc_mkdir("xt_quota", init_net__proc_net); + proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net); if (proc_xt_quota == NULL) return -EACCES; ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); if (ret < 0) - remove_proc_entry("xt_quota", init_net__proc_net); + remove_proc_entry("xt_quota", init_net.proc_net); + pr_debug("xt_quota2: init() %d", ret); return ret; } static void __exit quota_mt2_exit(void) { xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); - remove_proc_entry("xt_quota", init_net__proc_net); + remove_proc_entry("xt_quota", init_net.proc_net); } module_init(quota_mt2_init); -- GitLab From e5d798684a7156676f65e93837d46f71ab063417 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Fri, 13 Apr 2012 19:22:35 -0700 Subject: [PATCH 0642/1442] ANDROID: netfilter: qtaguid: initialize a local var to keep compiler happy. There was a case that might have seemed like new_tag_stat was not initialized and actually used. Added comment explaining why it was impossible, and a BUG() in case the logic gets changed. Change-Id: I1eddd1b6f754c08a3bf89f7e9427e5dce1dfb081 Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index b0a221806878..3d3928291efb 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1265,7 +1265,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, struct data_counters *uid_tag_counters; struct sock_tag *sock_tag_entry; struct iface_stat *iface_entry; - struct tag_stat *new_tag_stat; + struct tag_stat *new_tag_stat = NULL; MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s " "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", ifname, uid, sk, direction, proto, bytes); @@ -1330,8 +1330,19 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, } if (acct_tag) { + /* Create the child {acct_tag, uid_tag} and hook up parent. */ new_tag_stat = create_if_tag_stat(iface_entry, tag); new_tag_stat->parent_counters = uid_tag_counters; + } else { + /* + * For new_tag_stat to be still NULL here would require: + * {0, uid_tag} exists + * and {acct_tag, uid_tag} doesn't exist + * AND acct_tag == 0. + * Impossible. This reassures us that new_tag_stat + * below will always be assigned. + */ + BUG_ON(!new_tag_stat); } tag_stat_update(new_tag_stat, direction, proto, bytes); spin_unlock_bh(&iface_entry->tag_stat_list_lock); -- GitLab From 4bb20aa8e1339de1d5c8341b5ac8320cb769bb78 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Tue, 17 Apr 2012 16:00:07 -0700 Subject: [PATCH 0643/1442] ANDROID: netfilter: xt_qtaguid: fix ipv6 protocol lookup When updating the stats for a given uid it would incorrectly assume IPV4 and pick up the wrong protocol when IPV6. Change-Id: Iea4a635012b4123bf7aa93809011b7b2040bb3d5 Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 39 +++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 3d3928291efb..2c1170f89d0f 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -26,6 +26,10 @@ #include #include +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#include +#endif + #include #include "xt_qtaguid_internal.h" #include "xt_qtaguid_print.h" @@ -1546,6 +1550,27 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, return sk; } +static int ipx_proto(const struct sk_buff *skb, + struct xt_action_param *par) +{ + int thoff = 0, tproto; + + switch (par->family) { + case NFPROTO_IPV6: + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); + if (tproto < 0) + MT_DEBUG("%s(): transport header not found in ipv6" + " skb=%p\n", __func__, skb); + break; + case NFPROTO_IPV4: + tproto = ip_hdr(skb)->protocol; + break; + default: + tproto = IPPROTO_RAW; + } + return tproto; +} + static void account_for_uid(const struct sk_buff *skb, const struct sock *alternate_sk, uid_t uid, struct xt_action_param *par) @@ -1572,15 +1597,15 @@ static void account_for_uid(const struct sk_buff *skb, } else if (unlikely(!el_dev->name)) { pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); } else { - MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n", - par->hooknum, - el_dev->name, - el_dev->type); + int proto = ipx_proto(skb, par); + MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", + par->hooknum, el_dev->name, el_dev->type, + par->family, proto); if_tag_stat_update(el_dev->name, uid, skb->sk ? skb->sk : alternate_sk, par->in ? IFS_RX : IFS_TX, - ip_hdr(skb)->protocol, skb->len); + proto, skb->len); } } @@ -1625,8 +1650,8 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) } else { atomic64_inc(&qtu_events.match_found_sk); } - MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n", - par->hooknum, sk, got_sock, ip_hdr(skb)->protocol); + MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n", + par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par)); if (sk != NULL) { MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", par->hooknum, sk, sk->sk_socket, -- GitLab From cf7ad6a249aaeb9df30ce6515101e2e17ec11781 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Thu, 26 Apr 2012 23:28:35 -0700 Subject: [PATCH 0644/1442] ANDROID: netfilter: xt_IDLETIMER: Add new netlink msg type Send notifications when the label becomes active after an idle period. Send netlink message notifications in addition to sysfs notifications. Using a uevent with subsystem=xt_idletimer INTERFACE=... STATE={active,inactive} This is backport from common android-3.0 commit: beb914e987cbbd368988d2b94a6661cb907c4d5a with uevent support instead of a new netlink message type. Change-Id: I31677ef00c94b5f82c8457e5bf9e5e584c23c523 Signed-off-by: Ashish Sharma Signed-off-by: JP Abgrall --- include/uapi/linux/netfilter/xt_IDLETIMER.h | 8 +++ net/netfilter/xt_IDLETIMER.c | 78 +++++++++++++++++++-- 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h index 208ae9387331..faaa28b3d061 100644 --- a/include/uapi/linux/netfilter/xt_IDLETIMER.h +++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h @@ -4,6 +4,7 @@ * Header file for Xtables timer target module. * * Copyright (C) 2004, 2010 Nokia Corporation + * * Written by Timo Teras * * Converted to x_tables and forward-ported to 2.6.34 @@ -32,12 +33,19 @@ #include #define MAX_IDLETIMER_LABEL_SIZE 28 +#define NLMSG_MAX_SIZE 64 + +#define NL_EVENT_TYPE_INACTIVE 0 +#define NL_EVENT_TYPE_ACTIVE 1 struct idletimer_tg_info { __u32 timeout; char label[MAX_IDLETIMER_LABEL_SIZE]; + /* Use netlink messages for notification in addition to sysfs */ + __u8 send_nl_msg; + /* for kernel module internal use only */ struct idletimer_tg *timer __attribute__((aligned(8))); }; diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index daf45da448fa..5320ff9179ac 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -5,6 +5,7 @@ * After timer expires a kevent will be sent. * * Copyright (C) 2004, 2010 Nokia Corporation + * * Written by Timo Teras * * Converted to x_tables and reworked for upstream inclusion @@ -38,8 +39,10 @@ #include #include #include +#include #include #include +#include struct idletimer_tg_attr { struct attribute attr; @@ -56,6 +59,8 @@ struct idletimer_tg { struct idletimer_tg_attr attr; unsigned int refcnt; + bool send_nl_msg; + bool active; }; static LIST_HEAD(idletimer_tg_list); @@ -63,6 +68,32 @@ static DEFINE_MUTEX(list_mutex); static struct kobject *idletimer_tg_kobj; +static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) +{ + char iface_msg[NLMSG_MAX_SIZE]; + char state_msg[NLMSG_MAX_SIZE]; + char *envp[] = { iface_msg, state_msg, NULL }; + int res; + + res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s", + iface); + if (NLMSG_MAX_SIZE <= res) { + pr_err("message too long (%d)", res); + return; + } + res = snprintf(state_msg, NLMSG_MAX_SIZE, "STATE=%s", + timer->active ? "active" : "inactive"); + if (NLMSG_MAX_SIZE <= res) { + pr_err("message too long (%d)", res); + return; + } + pr_debug("putting nlmsg: <%s> <%s>\n", iface_msg, state_msg); + kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp); + return; + + +} + static struct idletimer_tg *__idletimer_tg_find_by_label(const char *label) { @@ -83,6 +114,7 @@ static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr, { struct idletimer_tg *timer; unsigned long expires = 0; + unsigned long now = jiffies; mutex_lock(&list_mutex); @@ -92,11 +124,15 @@ static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr, mutex_unlock(&list_mutex); - if (time_after(expires, jiffies)) + if (time_after(expires, now)) return sprintf(buf, "%u\n", - jiffies_to_msecs(expires - jiffies) / 1000); + jiffies_to_msecs(expires - now) / 1000); - return sprintf(buf, "0\n"); + if (timer->send_nl_msg) + return sprintf(buf, "0 %d\n", + jiffies_to_msecs(now - expires) / 1000); + else + return sprintf(buf, "0\n"); } static void idletimer_tg_work(struct work_struct *work) @@ -105,6 +141,9 @@ static void idletimer_tg_work(struct work_struct *work) work); sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name); + + if (timer->send_nl_msg) + notify_netlink_uevent(timer->attr.attr.name, timer); } static void idletimer_tg_expired(unsigned long data) @@ -113,6 +152,7 @@ static void idletimer_tg_expired(unsigned long data) pr_debug("timer %s expired\n", timer->attr.attr.name); + timer->active = false; schedule_work(&timer->work); } @@ -146,6 +186,8 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) setup_timer(&info->timer->timer, idletimer_tg_expired, (unsigned long) info->timer); info->timer->refcnt = 1; + info->timer->send_nl_msg = (info->send_nl_msg == 0) ? false : true; + info->timer->active = true; mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); @@ -169,14 +211,24 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb, const struct xt_action_param *par) { const struct idletimer_tg_info *info = par->targinfo; + unsigned long now = jiffies; pr_debug("resetting timer %s, timeout period %u\n", info->label, info->timeout); BUG_ON(!info->timer); + info->timer->active = true; + + if (time_before(info->timer->timer.expires, now)) { + schedule_work(&info->timer->work); + pr_debug("Starting timer %s (Expired, Jiffies): %lu, %lu\n", + info->label, info->timer->timer.expires, now); + } + + /* TODO: Avoid modifying timers on each packet */ mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + jiffies); + msecs_to_jiffies(info->timeout * 1000) + now); return XT_CONTINUE; } @@ -185,8 +237,9 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) { struct idletimer_tg_info *info = par->targinfo; int ret; + unsigned long now = jiffies; - pr_debug("checkentry targinfo%s\n", info->label); + pr_debug("checkentry targinfo %s\n", info->label); if (info->timeout == 0) { pr_debug("timeout value is zero\n"); @@ -205,8 +258,16 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { info->timer->refcnt++; + info->timer->active = true; + + if (time_before(info->timer->timer.expires, now)) { + schedule_work(&info->timer->work); + pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", + info->timer->timer.expires, now); + } + mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + jiffies); + msecs_to_jiffies(info->timeout * 1000) + now); pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); @@ -220,6 +281,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) } mutex_unlock(&list_mutex); + return 0; } @@ -242,7 +304,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) kfree(info->timer); } else { pr_debug("decreased refcnt of timer %s to %u\n", - info->label, info->timer->refcnt); + info->label, info->timer->refcnt); } mutex_unlock(&list_mutex); @@ -250,6 +312,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) static struct xt_target idletimer_tg __read_mostly = { .name = "IDLETIMER", + .revision = 1, .family = NFPROTO_UNSPEC, .target = idletimer_tg_target, .targetsize = sizeof(struct idletimer_tg_info), @@ -315,3 +378,4 @@ MODULE_DESCRIPTION("Xtables: idle time monitor"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("ipt_IDLETIMER"); MODULE_ALIAS("ip6t_IDLETIMER"); +MODULE_ALIAS("arpt_IDLETIMER"); -- GitLab From 9e0858ce041e963abd4043e1e47d11d9e47d686e Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Fri, 27 Apr 2012 12:57:39 -0700 Subject: [PATCH 0645/1442] ANDROID: netfilter: xt_qtaguid: start tracking iface rx/tx at low level qtaguid tracks the device stats by monitoring when it goes up and down, then it gets the dev_stats(). But devs don't correctly report stats (either they don't count headers symmetrically between rx/tx, or they count internal control messages). Now qtaguid counts the rx/tx bytes/packets during raw:prerouting and mangle:postrouting (nat is not available in ipv6). The results are in /proc/net/xt_qtaguid/iface_stat_fmt which outputs a format line (bash expansion): ifname total_skb_{rx,tx}_{bytes,packets} Added event counters for pre/post handling. Added extra ctrl_*() pid/uid debugging. Change-Id: Id84345d544ad1dd5f63e3842cab229e71d339297 Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 277 ++++++++++++++++++++++------ net/netfilter/xt_qtaguid_internal.h | 5 +- net/netfilter/xt_qtaguid_print.c | 18 +- 3 files changed, 233 insertions(+), 67 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 2c1170f89d0f..9fd0ffa6c365 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -114,8 +114,15 @@ module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR); /*---------------------------------------------------------------------------*/ static const char *iface_stat_procdirname = "iface_stat"; static struct proc_dir_entry *iface_stat_procdir; +/* + * The iface_stat_all* will go away once userspace gets use to the new fields + * that have a format line. + */ static const char *iface_stat_all_procfilename = "iface_stat_all"; static struct proc_dir_entry *iface_stat_all_procfile; +static const char *iface_stat_fmt_procfilename = "iface_stat_fmt"; +static struct proc_dir_entry *iface_stat_fmt_procfile; + /* * Ordering of locks: @@ -128,9 +135,9 @@ static struct proc_dir_entry *iface_stat_all_procfile; * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock * is acquired. * - * Call tree with all lock holders as of 2011-09-25: + * Call tree with all lock holders as of 2012-04-27: * - * iface_stat_all_proc_read() + * iface_stat_fmt_proc_read() * iface_stat_list_lock * (struct iface_stat) * @@ -781,13 +788,14 @@ static struct iface_stat *get_iface_entry(const char *ifname) return iface_entry; } -static int iface_stat_all_proc_read(char *page, char **num_items_returned, +static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, off_t items_to_skip, int char_count, int *eof, void *data) { char *outp = page; int item_index = 0; int len; + int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */ struct iface_stat *iface_entry; struct rtnl_link_stats64 dev_stats, *stats; struct rtnl_link_stats64 no_dev_stats = {0}; @@ -797,14 +805,32 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned, return 0; } - CT_DEBUG("qtaguid:proc iface_stat_all " + CT_DEBUG("qtaguid:proc iface_stat_fmt " + "pid=%u tgid=%u uid=%u " "page=%p *num_items_returned=%p off=%ld " - "char_count=%d *eof=%d\n", page, *num_items_returned, + "char_count=%d *eof=%d\n", + current->pid, current->tgid, current_fsuid(), + page, *num_items_returned, items_to_skip, char_count, *eof); if (*eof) return 0; + if (fmt == 2 && item_index++ >= items_to_skip) { + len = snprintf(outp, char_count, + "ifname " + "total_skb_rx_bytes total_skb_rx_packets " + "total_skb_tx_bytes total_skb_tx_packets\n" + ); + if (len >= char_count) { + *outp = '\0'; + return outp - page; + } + outp += len; + char_count -= len; + (*num_items_returned)++; + } + /* * This lock will prevent iface_stat_update() from changing active, * and in turn prevent an interface from unregistering itself. @@ -820,18 +846,37 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned, } else { stats = &no_dev_stats; } - len = snprintf(outp, char_count, - "%s %d " - "%llu %llu %llu %llu " - "%llu %llu %llu %llu\n", - iface_entry->ifname, - iface_entry->active, - iface_entry->totals[IFS_RX].bytes, - iface_entry->totals[IFS_RX].packets, - iface_entry->totals[IFS_TX].bytes, - iface_entry->totals[IFS_TX].packets, - stats->rx_bytes, stats->rx_packets, - stats->tx_bytes, stats->tx_packets); + /* + * If the meaning of the data changes, then update the fmtX + * string. + */ + if (fmt == 1) { + len = snprintf( + outp, char_count, + "%s %d " + "%llu %llu %llu %llu " + "%llu %llu %llu %llu\n", + iface_entry->ifname, + iface_entry->active, + iface_entry->totals_via_dev[IFS_RX].bytes, + iface_entry->totals_via_dev[IFS_RX].packets, + iface_entry->totals_via_dev[IFS_TX].bytes, + iface_entry->totals_via_dev[IFS_TX].packets, + stats->rx_bytes, stats->rx_packets, + stats->tx_bytes, stats->tx_packets + ); + } else { + len = snprintf( + outp, char_count, + "%s " + "%llu %llu %llu %llu\n", + iface_entry->ifname, + iface_entry->totals_via_skb[IFS_RX].bytes, + iface_entry->totals_via_skb[IFS_RX].packets, + iface_entry->totals_via_skb[IFS_TX].bytes, + iface_entry->totals_via_skb[IFS_TX].packets + ); + } if (len >= char_count) { spin_unlock_bh(&iface_stat_list_lock); *outp = '\0'; @@ -865,13 +910,17 @@ static void iface_create_proc_worker(struct work_struct *work) new_iface->proc_ptr = proc_entry; create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, - read_proc_u64, &new_iface->totals[IFS_TX].bytes); + read_proc_u64, + &new_iface->totals_via_dev[IFS_TX].bytes); create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, - read_proc_u64, &new_iface->totals[IFS_RX].bytes); + read_proc_u64, + &new_iface->totals_via_dev[IFS_RX].bytes); create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, - read_proc_u64, &new_iface->totals[IFS_TX].packets); + read_proc_u64, + &new_iface->totals_via_dev[IFS_TX].packets); create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, - read_proc_u64, &new_iface->totals[IFS_RX].packets); + read_proc_u64, + &new_iface->totals_via_dev[IFS_RX].packets); create_proc_read_entry("active", proc_iface_perms, proc_entry, read_proc_bool, &new_iface->active); @@ -975,11 +1024,13 @@ static void iface_check_stats_reset_and_adjust(struct net_device *net_dev, "iface reset its stats unexpectedly\n", __func__, net_dev->name); - iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes; - iface->totals[IFS_TX].packets += + iface->totals_via_dev[IFS_TX].bytes += + iface->last_known[IFS_TX].bytes; + iface->totals_via_dev[IFS_TX].packets += iface->last_known[IFS_TX].packets; - iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes; - iface->totals[IFS_RX].packets += + iface->totals_via_dev[IFS_RX].bytes += + iface->last_known[IFS_RX].bytes; + iface->totals_via_dev[IFS_RX].packets += iface->last_known[IFS_RX].packets; iface->last_known_valid = false; IF_DEBUG("qtaguid: %s(%s): iface=%p " @@ -1147,6 +1198,27 @@ static struct sock_tag *get_sock_stat(const struct sock *sk) return sock_tag_entry; } +static int ipx_proto(const struct sk_buff *skb, + struct xt_action_param *par) +{ + int thoff = 0, tproto; + + switch (par->family) { + case NFPROTO_IPV6: + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); + if (tproto < 0) + MT_DEBUG("%s(): transport header not found in ipv6" + " skb=%p\n", __func__, skb); + break; + case NFPROTO_IPV4: + tproto = ip_hdr(skb)->protocol; + break; + default: + tproto = IPPROTO_RAW; + } + return tproto; +} + static void data_counters_update(struct data_counters *dc, int set, enum ifs_tx_rx direction, int proto, int bytes) @@ -1207,10 +1279,10 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only) spin_unlock_bh(&iface_stat_list_lock); return; } - entry->totals[IFS_TX].bytes += stats->tx_bytes; - entry->totals[IFS_TX].packets += stats->tx_packets; - entry->totals[IFS_RX].bytes += stats->rx_bytes; - entry->totals[IFS_RX].packets += stats->rx_packets; + entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes; + entry->totals_via_dev[IFS_TX].packets += stats->tx_packets; + entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes; + entry->totals_via_dev[IFS_RX].packets += stats->rx_packets; /* We don't need the last_known[] anymore */ entry->last_known_valid = false; _iface_stat_set_active(entry, net_dev, false); @@ -1220,6 +1292,67 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only) spin_unlock_bh(&iface_stat_list_lock); } +/* + * Update stats for the specified interface from the skb. + * Do nothing if the entry + * does not exist (when a device was never configured with an IP address). + * Called on each sk. + */ +static void iface_stat_update_from_skb(const struct sk_buff *skb, + struct xt_action_param *par) +{ + struct iface_stat *entry; + const struct net_device *el_dev; + enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX; + int bytes = skb->len; + + if (!skb->dev) { + MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); + el_dev = par->in ? : par->out; + } else { + const struct net_device *other_dev; + el_dev = skb->dev; + other_dev = par->in ? : par->out; + if (el_dev != other_dev) { + MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " + "par->(in/out)=%p %s\n", + par->hooknum, el_dev, el_dev->name, other_dev, + other_dev->name); + } + } + + if (unlikely(!el_dev)) { + pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n", + par->hooknum, __func__); + BUG(); + } else if (unlikely(!el_dev->name)) { + pr_err("qtaguid[%d]: %s(): no dev->name?!!\n", + par->hooknum, __func__); + BUG(); + } else { + int proto = ipx_proto(skb, par); + MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", + par->hooknum, el_dev->name, el_dev->type, + par->family, proto); + } + + spin_lock_bh(&iface_stat_list_lock); + entry = get_iface_entry(el_dev->name); + if (entry == NULL) { + IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n", + __func__, el_dev->name); + spin_unlock_bh(&iface_stat_list_lock); + return; + } + + IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, + el_dev->name, entry); + + entry->totals_via_skb[direction].bytes += bytes; + entry->totals_via_skb[direction].packets++; + spin_unlock_bh(&iface_stat_list_lock); +} + static void tag_stat_update(struct tag_stat *tag_entry, enum ifs_tx_rx direction, int proto, int bytes) { @@ -1467,18 +1600,31 @@ static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) parent_procdir); if (!iface_stat_all_procfile) { pr_err("qtaguid: iface_stat: init " - " failed to create stat_all proc entry\n"); + " failed to create stat_old proc entry\n"); err = -1; goto err_zap_entry; } - iface_stat_all_procfile->read_proc = iface_stat_all_proc_read; + iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read; + iface_stat_all_procfile->data = (void *)1; /* fmt1 */ + + iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename, + proc_iface_perms, + parent_procdir); + if (!iface_stat_fmt_procfile) { + pr_err("qtaguid: iface_stat: init " + " failed to create stat_all proc entry\n"); + err = -1; + goto err_zap_all_stats_entry; + } + iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read; + iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */ err = register_netdevice_notifier(&iface_netdev_notifier_blk); if (err) { pr_err("qtaguid: iface_stat: init " "failed to register dev event handler\n"); - goto err_zap_all_stats_entry; + goto err_zap_all_stats_entries; } err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk); if (err) { @@ -1499,6 +1645,8 @@ static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk); err_unreg_nd: unregister_netdevice_notifier(&iface_netdev_notifier_blk); +err_zap_all_stats_entries: + remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir); err_zap_all_stats_entry: remove_proc_entry(iface_stat_all_procfilename, parent_procdir); err_zap_entry: @@ -1550,27 +1698,6 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, return sk; } -static int ipx_proto(const struct sk_buff *skb, - struct xt_action_param *par) -{ - int thoff = 0, tproto; - - switch (par->family) { - case NFPROTO_IPV6: - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) - MT_DEBUG("%s(): transport header not found in ipv6" - " skb=%p\n", __func__, skb); - break; - case NFPROTO_IPV4: - tproto = ip_hdr(skb)->protocol; - break; - default: - tproto = IPPROTO_RAW; - } - return tproto; -} - static void account_for_uid(const struct sk_buff *skb, const struct sock *alternate_sk, uid_t uid, struct xt_action_param *par) @@ -1630,8 +1757,22 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) goto ret_res; } - sk = skb->sk; + switch (par->hooknum) { + case NF_INET_PRE_ROUTING: + case NF_INET_POST_ROUTING: + atomic64_inc(&qtu_events.match_calls_prepost); + iface_stat_update_from_skb(skb, par); + /* + * We are done in pre/post. The skb will get processed + * further alter. + */ + res = (info->match ^ info->invert); + goto ret_res; + break; + /* default: Fall through and do UID releated work */ + } + sk = skb->sk; if (sk == NULL) { /* * A missing sk->sk_socket happens when packets are in-flight @@ -1806,8 +1947,10 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, if (*eof) return 0; - CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n", - page, items_to_skip, char_count, *eof); + CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u " + "page=%p off=%ld char_count=%d *eof=%d\n", + current->pid, current->tgid, current_fsuid(), + page, items_to_skip, char_count, *eof); spin_lock_bh(&sock_tag_list_lock); for (node = rb_first(&sock_tag_tree); @@ -1851,6 +1994,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, "delete_cmds=%llu " "iface_events=%llu " "match_calls=%llu " + "match_calls_prepost=%llu " "match_found_sk=%llu " "match_found_sk_in_ct=%llu " "match_found_no_sk_in_ct=%llu " @@ -1862,6 +2006,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, atomic64_read(&qtu_events.delete_cmds), atomic64_read(&qtu_events.iface_events), atomic64_read(&qtu_events.match_calls), + atomic64_read(&qtu_events.match_calls_prepost), atomic64_read(&qtu_events.match_found_sk), atomic64_read(&qtu_events.match_found_sk_in_ct), atomic64_read( @@ -2135,7 +2280,9 @@ static int ctrl_cmd_tag(const char *input) el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ if (!el_socket) { pr_info("qtaguid: ctrl_tag(%s): failed to lookup" - " sock_fd=%d err=%d\n", input, sock_fd, res); + " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n", + input, sock_fd, res, current->pid, current->tgid, + current_fsuid()); goto err; } CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", @@ -2280,7 +2427,9 @@ static int ctrl_cmd_untag(const char *input) el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ if (!el_socket) { pr_info("qtaguid: ctrl_untag(%s): failed to lookup" - " sock_fd=%d err=%d\n", input, sock_fd, res); + " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n", + input, sock_fd, res, current->pid, current->tgid, + current_fsuid()); goto err; } CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", @@ -2356,6 +2505,9 @@ static int qtaguid_ctrl_parse(const char *input, int count) char cmd; int res; + CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n", + input, current->pid, current->tgid, current_fsuid()); + cmd = input[0]; /* Collect params for commands */ switch (cmd) { @@ -2532,9 +2684,12 @@ static int qtaguid_stats_proc_read(char *page, char **num_items_returned, return len; } - CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld " - "char_count=%d *eof=%d\n", page, *num_items_returned, - items_to_skip, char_count, *eof); + CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u " + "page=%p *num_items_returned=%p off=%ld " + "char_count=%d *eof=%d\n", + current->pid, current->tgid, current_fsuid(), + page, *num_items_returned, + items_to_skip, char_count, *eof); if (*eof) return 0; diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h index 02479d6d317d..d79f8383abf4 100644 --- a/net/netfilter/xt_qtaguid_internal.h +++ b/net/netfilter/xt_qtaguid_internal.h @@ -202,7 +202,8 @@ struct iface_stat { /* net_dev is only valid for active iface_stat */ struct net_device *net_dev; - struct byte_packet_counters totals[IFS_MAX_DIRECTIONS]; + struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS]; + struct byte_packet_counters totals_via_skb[IFS_MAX_DIRECTIONS]; /* * We keep the last_known, because some devices reset their counters * just before NETDEV_UP, while some will reset just before @@ -254,6 +255,8 @@ struct qtaguid_event_counts { atomic64_t iface_events; /* Number of NETDEV_* events handled */ atomic64_t match_calls; /* Number of times iptables called mt */ + /* Number of times iptables called mt from pre or post routing hooks */ + atomic64_t match_calls_prepost; /* * match_found_sk_*: numbers related to the netfilter matching * function finding a sock for the sk_buff. diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c index 39176785c91f..8cbd8e42bcc4 100644 --- a/net/netfilter/xt_qtaguid_print.c +++ b/net/netfilter/xt_qtaguid_print.c @@ -183,7 +183,11 @@ char *pp_iface_stat(struct iface_stat *is) res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" "list=list_head{...}, " "ifname=%s, " - "total={rx={bytes=%llu, " + "total_dev={rx={bytes=%llu, " + "packets=%llu}, " + "tx={bytes=%llu, " + "packets=%llu}}, " + "total_skb={rx={bytes=%llu, " "packets=%llu}, " "tx={bytes=%llu, " "packets=%llu}}, " @@ -198,10 +202,14 @@ char *pp_iface_stat(struct iface_stat *is) "tag_stat_tree=rb_root{...}}", is, is->ifname, - is->totals[IFS_RX].bytes, - is->totals[IFS_RX].packets, - is->totals[IFS_TX].bytes, - is->totals[IFS_TX].packets, + is->totals_via_dev[IFS_RX].bytes, + is->totals_via_dev[IFS_RX].packets, + is->totals_via_dev[IFS_TX].bytes, + is->totals_via_dev[IFS_TX].packets, + is->totals_via_skb[IFS_RX].bytes, + is->totals_via_skb[IFS_RX].packets, + is->totals_via_skb[IFS_TX].bytes, + is->totals_via_skb[IFS_TX].packets, is->last_known_valid, is->last_known[IFS_RX].bytes, is->last_known[IFS_RX].packets, -- GitLab From 4cf10b889245d84d5e19d031f258a64a79257878 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Tue, 28 Aug 2012 16:53:32 -0700 Subject: [PATCH 0646/1442] ANDROID: netfilter: xt_qtaguid: report only uid tags to non-privileged processes In the past, a process could only see its own stats (uid-based summary, and details). Now we allow any process to see other UIDs uid-based stats, but still hide the detailed stats. Change-Id: I7666961ed244ac1d9359c339b048799e5db9facc Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 9fd0ffa6c365..14b003da1423 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -2588,8 +2588,9 @@ static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) } else { tag_t tag = ppi->ts_entry->tn.tag; uid_t stat_uid = get_uid_from_tag(tag); - - if (!can_read_other_uid_stats(stat_uid)) { + /* Detailed tags are not available to everybody */ + if (get_atag_from_tag(tag) + && !can_read_other_uid_stats(stat_uid)) { CT_DEBUG("qtaguid: stats line: " "%s 0x%llx %u: insufficient priv " "from pid=%u tgid=%u uid=%u\n", -- GitLab From b79c36f1a8bdb641c76a78728894de87a756a67b Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Tue, 9 Oct 2012 20:38:21 -0700 Subject: [PATCH 0647/1442] ANDROID: netfilter: xt_qtaguid: fix error exit that would keep a spinlock. qtudev_open() could return with a uid_tag_data_tree_lock held when an kzalloc(..., GFP_ATOMIC) would fail. Very unlikely to get triggered AND survive the mayhem of running out of mem. Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 14b003da1423..6b22563a924f 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -2752,7 +2752,7 @@ static int qtudev_open(struct inode *inode, struct file *file) utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); if (IS_ERR_OR_NULL(utd_entry)) { res = PTR_ERR(utd_entry); - goto err; + goto err_unlock; } /* Look for existing PID based proc_data */ @@ -2794,8 +2794,8 @@ static int qtudev_open(struct inode *inode, struct file *file) rb_erase(&utd_entry->node, &uid_tag_data_tree); kfree(utd_entry); } +err_unlock: spin_unlock_bh(&uid_tag_data_tree_lock); -err: return res; } -- GitLab From b842ea513f70b4540d636adecded9f3d0804ad13 Mon Sep 17 00:00:00 2001 From: Pontus Fuchs Date: Mon, 19 Nov 2012 11:44:51 -0800 Subject: [PATCH 0648/1442] ANDROID: netfilter: qtaguid: Don't BUG_ON if create_if_tag_stat fails If create_if_tag_stat fails to allocate memory (GFP_ATOMIC) the following will happen: qtaguid: iface_stat: tag stat alloc failed ... kernel BUG at xt_qtaguid.c:1482! Signed-off-by: Pontus Fuchs --- net/netfilter/xt_qtaguid.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 6b22563a924f..603bdd206990 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1461,6 +1461,8 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. */ new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); + if (!new_tag_stat) + goto unlock; uid_tag_counters = &new_tag_stat->counters; } else { uid_tag_counters = &tag_stat_entry->counters; @@ -1469,6 +1471,8 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, if (acct_tag) { /* Create the child {acct_tag, uid_tag} and hook up parent. */ new_tag_stat = create_if_tag_stat(iface_entry, tag); + if (!new_tag_stat) + goto unlock; new_tag_stat->parent_counters = uid_tag_counters; } else { /* @@ -1482,6 +1486,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, BUG_ON(!new_tag_stat); } tag_stat_update(new_tag_stat, direction, proto, bytes); +unlock: spin_unlock_bh(&iface_entry->tag_stat_list_lock); } -- GitLab From 90414bcc83c9a4f493e080150371f997ddac8cb7 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Fri, 4 Jan 2013 18:18:36 -0800 Subject: [PATCH 0649/1442] ANDROID: netfilter: xt_qtaguid: remove AID_* dependency for access control qtaguid limits what can be done with /ctrl and /stats based on group membership. This changes removes AID_NET_BW_STATS and AID_NET_BW_ACCT, and picks up the groups from the gid of the matching proc entry files. Signed-off-by: JP Abgrall Change-Id: I42e477adde78a12ed5eb58fbc0b277cdaadb6f94 --- net/netfilter/xt_qtaguid.c | 51 +++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 603bdd206990..923f1bdd02e8 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -53,25 +53,22 @@ static unsigned int proc_stats_perms = S_IRUGO; module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); static struct proc_dir_entry *xt_qtaguid_ctrl_file; -#ifdef CONFIG_ANDROID_PARANOID_NETWORK + +/* Everybody can write. But proc_ctrl_write_limited is true by default which + * limits what can be controlled. See the can_*() functions. + */ static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO; -#else -static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR; -#endif module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); -#ifdef CONFIG_ANDROID_PARANOID_NETWORK -#include -static gid_t proc_stats_readall_gid = AID_NET_BW_STATS; -static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT; -#else -/* 0 means, don't limit anybody */ -static gid_t proc_stats_readall_gid; -static gid_t proc_ctrl_write_gid; -#endif -module_param_named(stats_readall_gid, proc_stats_readall_gid, uint, +/* Limited by default, so the gid of the ctrl and stats proc entries + * will limit what can be done. See the can_*() functions. + */ +static bool proc_stats_readall_limited = true; +static bool proc_ctrl_write_limited = true; + +module_param_named(stats_readall_limited, proc_stats_readall_limited, bool, S_IRUGO | S_IWUSR); -module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint, +module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool, S_IRUGO | S_IWUSR); /* @@ -242,8 +239,9 @@ static struct qtaguid_event_counts qtu_events; static bool can_manipulate_uids(void) { /* root pwnd */ - return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid) - || in_egroup_p(proc_ctrl_write_gid); + return in_egroup_p(xt_qtaguid_ctrl_file->gid) + || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited) + || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid); } static bool can_impersonate_uid(uid_t uid) @@ -254,9 +252,10 @@ static bool can_impersonate_uid(uid_t uid) static bool can_read_other_uid_stats(uid_t uid) { /* root pwnd */ - return unlikely(!current_fsuid()) || uid == current_fsuid() - || unlikely(!proc_stats_readall_gid) - || in_egroup_p(proc_stats_readall_gid); + return in_egroup_p(xt_qtaguid_stats_file->gid) + || unlikely(!current_fsuid()) || uid == current_fsuid() + || unlikely(!proc_stats_readall_limited) + || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid); } static inline void dc_add_byte_packets(struct data_counters *counters, int set, @@ -2302,11 +2301,12 @@ static int ctrl_cmd_tag(const char *input) } CT_DEBUG("qtaguid: ctrl_tag(%s): " "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " - "in_group=%d in_egroup=%d\n", + "ctrl.gid=%u in_group()=%d in_egroup()=%d\n", input, current->pid, current->tgid, current_uid(), current_euid(), current_fsuid(), - in_group_p(proc_ctrl_write_gid), - in_egroup_p(proc_ctrl_write_gid)); + xt_qtaguid_ctrl_file->gid, + in_group_p(xt_qtaguid_ctrl_file->gid), + in_egroup_p(xt_qtaguid_ctrl_file->gid)); if (argc < 4) { uid = current_fsuid(); } else if (!can_impersonate_uid(uid)) { @@ -2598,10 +2598,11 @@ static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) && !can_read_other_uid_stats(stat_uid)) { CT_DEBUG("qtaguid: stats line: " "%s 0x%llx %u: insufficient priv " - "from pid=%u tgid=%u uid=%u\n", + "from pid=%u tgid=%u uid=%u stats.gid=%u\n", ppi->iface_entry->ifname, get_atag_from_tag(tag), stat_uid, - current->pid, current->tgid, current_fsuid()); + current->pid, current->tgid, current_fsuid(), + xt_qtaguid_stats_file->gid); return 0; } if (ppi->item_index++ < ppi->items_to_skip) -- GitLab From 87f93e81c7d7301dfff984426bdbdb6095be4acc Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Mon, 28 Jan 2013 16:50:44 -0800 Subject: [PATCH 0650/1442] ANDROID: netfilter: xt_qtaguid: extend iface stat to report protocols In the past the iface_stat_fmt would only show global bytes/packets for the skb-based numbers. For stall detection in userspace, distinguishing tcp vs other protocols makes it easier. Now we report ifname total_skb_rx_bytes total_skb_rx_packets total_skb_tx_bytes total_skb_tx_packets {rx,tx}_{tcp,udp,ohter}_{bytes,packets} Bug: 6818637 Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 90 +++++++++++++++++------------ net/netfilter/xt_qtaguid_internal.h | 21 ++++++- net/netfilter/xt_qtaguid_print.c | 14 +++-- 3 files changed, 82 insertions(+), 43 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 923f1bdd02e8..92e5f80bd8fa 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -268,24 +268,6 @@ static inline void dc_add_byte_packets(struct data_counters *counters, int set, counters->bpc[set][direction][ifs_proto].packets += packets; } -static inline uint64_t dc_sum_bytes(struct data_counters *counters, - int set, - enum ifs_tx_rx direction) -{ - return counters->bpc[set][direction][IFS_TCP].bytes - + counters->bpc[set][direction][IFS_UDP].bytes - + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; -} - -static inline uint64_t dc_sum_packets(struct data_counters *counters, - int set, - enum ifs_tx_rx direction) -{ - return counters->bpc[set][direction][IFS_TCP].packets - + counters->bpc[set][direction][IFS_UDP].packets - + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; -} - static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) { struct rb_node *node = root->rb_node; @@ -787,6 +769,53 @@ static struct iface_stat *get_iface_entry(const char *ifname) return iface_entry; } +/* This is for fmt2 only */ +static int pp_iface_stat_line(bool header, char *outp, + int char_count, struct iface_stat *iface_entry) +{ + int len; + if (header) { + len = snprintf(outp, char_count, + "ifname " + "total_skb_rx_bytes total_skb_rx_packets " + "total_skb_tx_bytes total_skb_tx_packets " + "rx_tcp_bytes rx_tcp_packets " + "rx_udp_bytes rx_udp_packets " + "rx_other_bytes rx_other_packets " + "tx_tcp_bytes tx_tcp_packets " + "tx_udp_bytes tx_udp_packets " + "tx_other_bytes tx_other_packets\n" + ); + } else { + struct data_counters *cnts; + int cnt_set = 0; /* We only use one set for the device */ + cnts = &iface_entry->totals_via_skb; + len = snprintf( + outp, char_count, + "%s " + "%llu %llu %llu %llu %llu %llu %llu %llu " + "%llu %llu %llu %llu %llu %llu %llu %llu\n", + iface_entry->ifname, + dc_sum_bytes(cnts, cnt_set, IFS_RX), + dc_sum_packets(cnts, cnt_set, IFS_RX), + dc_sum_bytes(cnts, cnt_set, IFS_TX), + dc_sum_packets(cnts, cnt_set, IFS_TX), + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); + } + return len; +} + static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, off_t items_to_skip, int char_count, int *eof, void *data) @@ -816,11 +845,7 @@ static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, return 0; if (fmt == 2 && item_index++ >= items_to_skip) { - len = snprintf(outp, char_count, - "ifname " - "total_skb_rx_bytes total_skb_rx_packets " - "total_skb_tx_bytes total_skb_tx_packets\n" - ); + len = pp_iface_stat_line(true, outp, char_count, NULL); if (len >= char_count) { *outp = '\0'; return outp - page; @@ -865,16 +890,8 @@ static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, stats->tx_bytes, stats->tx_packets ); } else { - len = snprintf( - outp, char_count, - "%s " - "%llu %llu %llu %llu\n", - iface_entry->ifname, - iface_entry->totals_via_skb[IFS_RX].bytes, - iface_entry->totals_via_skb[IFS_RX].packets, - iface_entry->totals_via_skb[IFS_TX].bytes, - iface_entry->totals_via_skb[IFS_TX].packets - ); + len = pp_iface_stat_line(false, outp, char_count, + iface_entry); } if (len >= char_count) { spin_unlock_bh(&iface_stat_list_lock); @@ -1304,6 +1321,7 @@ static void iface_stat_update_from_skb(const struct sk_buff *skb, const struct net_device *el_dev; enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX; int bytes = skb->len; + int proto; if (!skb->dev) { MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); @@ -1329,7 +1347,7 @@ static void iface_stat_update_from_skb(const struct sk_buff *skb, par->hooknum, __func__); BUG(); } else { - int proto = ipx_proto(skb, par); + proto = ipx_proto(skb, par); MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n", par->hooknum, el_dev->name, el_dev->type, par->family, proto); @@ -1347,8 +1365,8 @@ static void iface_stat_update_from_skb(const struct sk_buff *skb, IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, el_dev->name, entry); - entry->totals_via_skb[direction].bytes += bytes; - entry->totals_via_skb[direction].packets++; + data_counters_update(&entry->totals_via_skb, 0, direction, proto, + bytes); spin_unlock_bh(&iface_stat_list_lock); } diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h index d79f8383abf4..6dc14a9c6889 100644 --- a/net/netfilter/xt_qtaguid_internal.h +++ b/net/netfilter/xt_qtaguid_internal.h @@ -179,6 +179,25 @@ struct data_counters { struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; }; +static inline uint64_t dc_sum_bytes(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].bytes + + counters->bpc[set][direction][IFS_UDP].bytes + + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; +} + +static inline uint64_t dc_sum_packets(struct data_counters *counters, + int set, + enum ifs_tx_rx direction) +{ + return counters->bpc[set][direction][IFS_TCP].packets + + counters->bpc[set][direction][IFS_UDP].packets + + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; +} + + /* Generic X based nodes used as a base for rb_tree ops */ struct tag_node { struct rb_node node; @@ -203,7 +222,7 @@ struct iface_stat { struct net_device *net_dev; struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS]; - struct byte_packet_counters totals_via_skb[IFS_MAX_DIRECTIONS]; + struct data_counters totals_via_skb; /* * We keep the last_known, because some devices reset their counters * just before NETDEV_UP, while some will reset just before diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c index 8cbd8e42bcc4..f6a00a3520ed 100644 --- a/net/netfilter/xt_qtaguid_print.c +++ b/net/netfilter/xt_qtaguid_print.c @@ -177,9 +177,10 @@ char *pp_tag_stat(struct tag_stat *ts) char *pp_iface_stat(struct iface_stat *is) { char *res; - if (!is) + if (!is) { res = kasprintf(GFP_ATOMIC, "iface_stat@null{}"); - else + } else { + struct data_counters *cnts = &is->totals_via_skb; res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" "list=list_head{...}, " "ifname=%s, " @@ -206,10 +207,10 @@ char *pp_iface_stat(struct iface_stat *is) is->totals_via_dev[IFS_RX].packets, is->totals_via_dev[IFS_TX].bytes, is->totals_via_dev[IFS_TX].packets, - is->totals_via_skb[IFS_RX].bytes, - is->totals_via_skb[IFS_RX].packets, - is->totals_via_skb[IFS_TX].bytes, - is->totals_via_skb[IFS_TX].packets, + dc_sum_bytes(cnts, 0, IFS_RX), + dc_sum_packets(cnts, 0, IFS_RX), + dc_sum_bytes(cnts, 0, IFS_TX), + dc_sum_packets(cnts, 0, IFS_TX), is->last_known_valid, is->last_known[IFS_RX].bytes, is->last_known[IFS_RX].packets, @@ -218,6 +219,7 @@ char *pp_iface_stat(struct iface_stat *is) is->active, is->net_dev, is->proc_ptr); + } _bug_on_err_or_null(res); return res; } -- GitLab From cf47f368650424fee946e83053b5b743449ae446 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 6 Feb 2013 17:40:07 -0800 Subject: [PATCH 0651/1442] ANDROID: netfilter: xt_qtaguid: Allow tracking loopback In the past it would always ignore interfaces with loopback addresses. Now we just treat them like any other. This also helps with writing tests that check for the presence of the qtaguid module. Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 92e5f80bd8fa..992a6e044902 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1108,18 +1108,13 @@ static void iface_stat_create(struct net_device *net_dev, spin_lock_bh(&iface_stat_list_lock); entry = get_iface_entry(ifname); if (entry != NULL) { - bool activate = !ipv4_is_loopback(ipaddr); IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", ifname, entry); iface_check_stats_reset_and_adjust(net_dev, entry); - _iface_stat_set_active(entry, net_dev, activate); + _iface_stat_set_active(entry, net_dev, true); IF_DEBUG("qtaguid: %s(%s): " "tracking now %d on ip=%pI4\n", __func__, - entry->ifname, activate, &ipaddr); - goto done_unlock_put; - } else if (ipv4_is_loopback(ipaddr)) { - IF_DEBUG("qtaguid: iface_stat: create(%s): " - "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); + entry->ifname, true, &ipaddr); goto done_unlock_put; } @@ -1170,19 +1165,13 @@ static void iface_stat_create_ipv6(struct net_device *net_dev, spin_lock_bh(&iface_stat_list_lock); entry = get_iface_entry(ifname); if (entry != NULL) { - bool activate = !(addr_type & IPV6_ADDR_LOOPBACK); IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, ifname, entry); iface_check_stats_reset_and_adjust(net_dev, entry); - _iface_stat_set_active(entry, net_dev, activate); + _iface_stat_set_active(entry, net_dev, true); IF_DEBUG("qtaguid: %s(%s): " "tracking now %d on ip=%pI6c\n", __func__, - entry->ifname, activate, &ifa->addr); - goto done_unlock_put; - } else if (addr_type & IPV6_ADDR_LOOPBACK) { - IF_DEBUG("qtaguid: %s(%s): " - "ignore loopback dev. ip=%pI6c\n", __func__, - ifname, &ifa->addr); + entry->ifname, true, &ifa->addr); goto done_unlock_put; } -- GitLab From a2e371bea14bcdf19560b55eb86afba491d2a5ac Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Mon, 8 Apr 2013 15:09:26 -0700 Subject: [PATCH 0652/1442] ANDROID: netfilter: qtaguid: rate limit some of the printks Some of the printks are in the packet handling path. We now ratelimit the very unlikely errors to avoid kmsg spamming. Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 992a6e044902..4ec6d23876c5 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1328,12 +1329,12 @@ static void iface_stat_update_from_skb(const struct sk_buff *skb, } if (unlikely(!el_dev)) { - pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n", - par->hooknum, __func__); + pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n", + par->hooknum, __func__); BUG(); } else if (unlikely(!el_dev->name)) { - pr_err("qtaguid[%d]: %s(): no dev->name?!!\n", - par->hooknum, __func__); + pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n", + par->hooknum, __func__); BUG(); } else { proto = ipx_proto(skb, par); @@ -1416,8 +1417,8 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, iface_entry = get_iface_entry(ifname); if (!iface_entry) { - pr_err("qtaguid: iface_stat: stat_update() %s not found\n", - ifname); + pr_err_ratelimited("qtaguid: iface_stat: stat_update() " + "%s not found\n", ifname); return; } /* It is ok to process data when an iface_entry is inactive */ -- GitLab From eb6aba2a14b9429ed961912009d1e00e1d8fc661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Mon, 13 May 2013 20:42:46 -0700 Subject: [PATCH 0653/1442] ANDROID: netfilter: xt_quota2: 3.10 fixes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Stop using obsolete create_proc_entry api. - Use proc_set_user instead of directly accessing the private structure. Signed-off-by: Arve Hjønnevåg --- net/netfilter/xt_quota2.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c index aace72928530..44ebdcc75965 100644 --- a/net/netfilter/xt_quota2.c +++ b/net/netfilter/xt_quota2.c @@ -122,22 +122,23 @@ static void quota2_log(unsigned int hooknum, } #endif /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */ -static int quota_proc_read(char *page, char **start, off_t offset, - int count, int *eof, void *data) +static int quota_proc_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) { - struct xt_quota_counter *e = data; - int ret; + struct xt_quota_counter *e = PDE_DATA(file_inode(file)); + char tmp[24]; + size_t tmp_size; spin_lock_bh(&e->lock); - ret = snprintf(page, PAGE_SIZE, "%llu\n", e->quota); + tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", e->quota); spin_unlock_bh(&e->lock); - return ret; + return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size); } static int quota_proc_write(struct file *file, const char __user *input, - unsigned long size, void *data) + size_t size, loff_t *ppos) { - struct xt_quota_counter *e = data; + struct xt_quota_counter *e = PDE_DATA(file_inode(file)); char buf[sizeof("18446744073709551616")]; if (size > sizeof(buf)) @@ -152,6 +153,12 @@ static int quota_proc_write(struct file *file, const char __user *input, return size; } +static const struct file_operations q2_counter_fops = { + .read = quota_proc_read, + .write = quota_proc_write, + .llseek = default_llseek, +}; + static struct xt_quota_counter * q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon) { @@ -215,8 +222,8 @@ q2_get_counter(const struct xt_quota_mtinfo2 *q) spin_unlock_bh(&counter_list_lock); /* create_proc_entry() is not spin_lock happy */ - p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms, - proc_xt_quota); + p = e->procfs_entry = proc_create_data(e->name, quota_list_perms, + proc_xt_quota, &q2_counter_fops, e); if (IS_ERR_OR_NULL(p)) { spin_lock_bh(&counter_list_lock); @@ -224,11 +231,7 @@ q2_get_counter(const struct xt_quota_mtinfo2 *q) spin_unlock_bh(&counter_list_lock); goto out; } - p->data = e; - p->read_proc = quota_proc_read; - p->write_proc = quota_proc_write; - p->uid = quota_list_uid; - p->gid = quota_list_gid; + proc_set_user(p, quota_list_uid, quota_list_gid); return e; out: -- GitLab From 287076ee5ecb4047d65df176f4d40f6e8d0949c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Mon, 13 May 2013 20:45:02 -0700 Subject: [PATCH 0654/1442] ANDROID: netfilter: xt_qtaguid: 3.10 fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop using obsolete procfs api. Signed-off-by: Arve Hjønnevåg --- net/netfilter/xt_qtaguid.c | 1055 ++++++++++++++++++------------------ 1 file changed, 535 insertions(+), 520 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 4ec6d23876c5..435664135785 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,7 @@ #include #include "xt_qtaguid_internal.h" #include "xt_qtaguid_print.h" +#include "../../fs/proc/internal.h" /* * We only use the xt_socket funcs within a similar context to avoid unexpected @@ -122,104 +124,6 @@ static const char *iface_stat_fmt_procfilename = "iface_stat_fmt"; static struct proc_dir_entry *iface_stat_fmt_procfile; -/* - * Ordering of locks: - * outer locks: - * iface_stat_list_lock - * sock_tag_list_lock - * inner locks: - * uid_tag_data_tree_lock - * tag_counter_set_list_lock - * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock - * is acquired. - * - * Call tree with all lock holders as of 2012-04-27: - * - * iface_stat_fmt_proc_read() - * iface_stat_list_lock - * (struct iface_stat) - * - * qtaguid_ctrl_proc_read() - * sock_tag_list_lock - * (sock_tag_tree) - * (struct proc_qtu_data->sock_tag_list) - * prdebug_full_state() - * sock_tag_list_lock - * (sock_tag_tree) - * uid_tag_data_tree_lock - * (uid_tag_data_tree) - * (proc_qtu_data_tree) - * iface_stat_list_lock - * - * qtaguid_stats_proc_read() - * iface_stat_list_lock - * struct iface_stat->tag_stat_list_lock - * - * qtudev_open() - * uid_tag_data_tree_lock - * - * qtudev_release() - * sock_tag_data_list_lock - * uid_tag_data_tree_lock - * prdebug_full_state() - * sock_tag_list_lock - * uid_tag_data_tree_lock - * iface_stat_list_lock - * - * iface_netdev_event_handler() - * iface_stat_create() - * iface_stat_list_lock - * iface_stat_update() - * iface_stat_list_lock - * - * iface_inetaddr_event_handler() - * iface_stat_create() - * iface_stat_list_lock - * iface_stat_update() - * iface_stat_list_lock - * - * iface_inet6addr_event_handler() - * iface_stat_create_ipv6() - * iface_stat_list_lock - * iface_stat_update() - * iface_stat_list_lock - * - * qtaguid_mt() - * account_for_uid() - * if_tag_stat_update() - * get_sock_stat() - * sock_tag_list_lock - * struct iface_stat->tag_stat_list_lock - * tag_stat_update() - * get_active_counter_set() - * tag_counter_set_list_lock - * tag_stat_update() - * get_active_counter_set() - * tag_counter_set_list_lock - * - * - * qtaguid_ctrl_parse() - * ctrl_cmd_delete() - * sock_tag_list_lock - * tag_counter_set_list_lock - * iface_stat_list_lock - * struct iface_stat->tag_stat_list_lock - * uid_tag_data_tree_lock - * ctrl_cmd_counter_set() - * tag_counter_set_list_lock - * ctrl_cmd_tag() - * sock_tag_list_lock - * (sock_tag_tree) - * get_tag_ref() - * uid_tag_data_tree_lock - * (uid_tag_data_tree) - * uid_tag_data_tree_lock - * (proc_qtu_data_tree) - * ctrl_cmd_untag() - * sock_tag_list_lock - * uid_tag_data_tree_lock - * - */ static LIST_HEAD(iface_stat_list); static DEFINE_SPINLOCK(iface_stat_list_lock); @@ -690,42 +594,26 @@ static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry) } } -static int read_proc_u64(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int read_proc_u64(struct file *file, char __user *buf, + size_t size, loff_t *ppos) { - int len; - uint64_t value; - char *p = page; - uint64_t *iface_entry = data; + uint64_t *valuep = PDE_DATA(file_inode(file)); + char tmp[24]; + size_t tmp_size; - if (!data) - return 0; - - value = *iface_entry; - p += sprintf(p, "%llu\n", value); - len = (p - page) - off; - *eof = (len <= count) ? 1 : 0; - *start = page + off; - return len; + tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep); + return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size); } -static int read_proc_bool(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int read_proc_bool(struct file *file, char __user *buf, + size_t size, loff_t *ppos) { - int len; - bool value; - char *p = page; - bool *bool_entry = data; - - if (!data) - return 0; + bool *valuep = PDE_DATA(file_inode(file)); + char tmp[24]; + size_t tmp_size; - value = *bool_entry; - p += sprintf(p, "%u\n", value); - len = (p - page) - off; - *eof = (len <= count) ? 1 : 0; - *start = page + off; - return len; + tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep); + return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size); } static int get_active_counter_set(tag_t tag) @@ -771,144 +659,132 @@ static struct iface_stat *get_iface_entry(const char *ifname) } /* This is for fmt2 only */ -static int pp_iface_stat_line(bool header, char *outp, - int char_count, struct iface_stat *iface_entry) -{ - int len; - if (header) { - len = snprintf(outp, char_count, - "ifname " - "total_skb_rx_bytes total_skb_rx_packets " - "total_skb_tx_bytes total_skb_tx_packets " - "rx_tcp_bytes rx_tcp_packets " - "rx_udp_bytes rx_udp_packets " - "rx_other_bytes rx_other_packets " - "tx_tcp_bytes tx_tcp_packets " - "tx_udp_bytes tx_udp_packets " - "tx_other_bytes tx_other_packets\n" - ); - } else { - struct data_counters *cnts; - int cnt_set = 0; /* We only use one set for the device */ - cnts = &iface_entry->totals_via_skb; - len = snprintf( - outp, char_count, - "%s " - "%llu %llu %llu %llu %llu %llu %llu %llu " - "%llu %llu %llu %llu %llu %llu %llu %llu\n", - iface_entry->ifname, - dc_sum_bytes(cnts, cnt_set, IFS_RX), - dc_sum_packets(cnts, cnt_set, IFS_RX), - dc_sum_bytes(cnts, cnt_set, IFS_TX), - dc_sum_packets(cnts, cnt_set, IFS_TX), - cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, - cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, - cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, - cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, - cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, - cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, - cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, - cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, - cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, - cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, - cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, - cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); - } - return len; -} - -static int iface_stat_fmt_proc_read(char *page, char **num_items_returned, - off_t items_to_skip, int char_count, - int *eof, void *data) -{ - char *outp = page; - int item_index = 0; - int len; - int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */ - struct iface_stat *iface_entry; - struct rtnl_link_stats64 dev_stats, *stats; - struct rtnl_link_stats64 no_dev_stats = {0}; - - if (unlikely(module_passive)) { - *eof = 1; - return 0; - } - - CT_DEBUG("qtaguid:proc iface_stat_fmt " - "pid=%u tgid=%u uid=%u " - "page=%p *num_items_returned=%p off=%ld " - "char_count=%d *eof=%d\n", - current->pid, current->tgid, current_fsuid(), - page, *num_items_returned, - items_to_skip, char_count, *eof); +static void pp_iface_stat_header(struct seq_file *m) +{ + seq_puts(m, + "ifname " + "total_skb_rx_bytes total_skb_rx_packets " + "total_skb_tx_bytes total_skb_tx_packets " + "rx_tcp_bytes rx_tcp_packets " + "rx_udp_bytes rx_udp_packets " + "rx_other_bytes rx_other_packets " + "tx_tcp_bytes tx_tcp_packets " + "tx_udp_bytes tx_udp_packets " + "tx_other_bytes tx_other_packets\n" + ); +} - if (*eof) - return 0; +static void pp_iface_stat_line(struct seq_file *m, + struct iface_stat *iface_entry) +{ + struct data_counters *cnts; + int cnt_set = 0; /* We only use one set for the device */ + cnts = &iface_entry->totals_via_skb; + seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu " + "%llu %llu %llu %llu %llu %llu %llu %llu\n", + iface_entry->ifname, + dc_sum_bytes(cnts, cnt_set, IFS_RX), + dc_sum_packets(cnts, cnt_set, IFS_RX), + dc_sum_bytes(cnts, cnt_set, IFS_TX), + dc_sum_packets(cnts, cnt_set, IFS_TX), + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); +} + +struct proc_iface_stat_fmt_info { + int fmt; +}; - if (fmt == 2 && item_index++ >= items_to_skip) { - len = pp_iface_stat_line(true, outp, char_count, NULL); - if (len >= char_count) { - *outp = '\0'; - return outp - page; - } - outp += len; - char_count -= len; - (*num_items_returned)++; - } +static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos) +{ + struct proc_iface_stat_fmt_info *p = m->private; + loff_t n = *pos; /* * This lock will prevent iface_stat_update() from changing active, * and in turn prevent an interface from unregistering itself. */ spin_lock_bh(&iface_stat_list_lock); - list_for_each_entry(iface_entry, &iface_stat_list, list) { - if (item_index++ < items_to_skip) - continue; - if (iface_entry->active) { - stats = dev_get_stats(iface_entry->net_dev, - &dev_stats); - } else { - stats = &no_dev_stats; - } - /* - * If the meaning of the data changes, then update the fmtX - * string. - */ - if (fmt == 1) { - len = snprintf( - outp, char_count, - "%s %d " - "%llu %llu %llu %llu " - "%llu %llu %llu %llu\n", - iface_entry->ifname, - iface_entry->active, - iface_entry->totals_via_dev[IFS_RX].bytes, - iface_entry->totals_via_dev[IFS_RX].packets, - iface_entry->totals_via_dev[IFS_TX].bytes, - iface_entry->totals_via_dev[IFS_TX].packets, - stats->rx_bytes, stats->rx_packets, - stats->tx_bytes, stats->tx_packets - ); - } else { - len = pp_iface_stat_line(false, outp, char_count, - iface_entry); - } - if (len >= char_count) { - spin_unlock_bh(&iface_stat_list_lock); - *outp = '\0'; - return outp - page; - } - outp += len; - char_count -= len; - (*num_items_returned)++; - } + if (unlikely(module_passive)) + return NULL; + + if (!n && p->fmt == 2) + pp_iface_stat_header(m); + + return seq_list_start(&iface_stat_list, n); +} + +static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos) +{ + return seq_list_next(p, &iface_stat_list, pos); +} + +static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p) +{ spin_unlock_bh(&iface_stat_list_lock); +} - *eof = 1; - return outp - page; +static int iface_stat_fmt_proc_show(struct seq_file *m, void *v) +{ + struct proc_iface_stat_fmt_info *p = m->private; + struct iface_stat *iface_entry; + struct rtnl_link_stats64 dev_stats, *stats; + struct rtnl_link_stats64 no_dev_stats = {0}; + + + CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n", + current->pid, current->tgid, current_fsuid()); + + iface_entry = list_entry(v, struct iface_stat, list); + + if (iface_entry->active) { + stats = dev_get_stats(iface_entry->net_dev, + &dev_stats); + } else { + stats = &no_dev_stats; + } + /* + * If the meaning of the data changes, then update the fmtX + * string. + */ + if (p->fmt == 1) { + seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n", + iface_entry->ifname, + iface_entry->active, + iface_entry->totals_via_dev[IFS_RX].bytes, + iface_entry->totals_via_dev[IFS_RX].packets, + iface_entry->totals_via_dev[IFS_TX].bytes, + iface_entry->totals_via_dev[IFS_TX].packets, + stats->rx_bytes, stats->rx_packets, + stats->tx_bytes, stats->tx_packets + ); + } else { + pp_iface_stat_line(m, iface_entry); + } + return 0; } +static const struct file_operations read_u64_fops = { + .read = read_proc_u64, + .llseek = default_llseek, +}; + +static const struct file_operations read_bool_fops = { + .read = read_proc_bool, + .llseek = default_llseek, +}; + static void iface_create_proc_worker(struct work_struct *work) { struct proc_dir_entry *proc_entry; @@ -926,20 +802,20 @@ static void iface_create_proc_worker(struct work_struct *work) new_iface->proc_ptr = proc_entry; - create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, - read_proc_u64, - &new_iface->totals_via_dev[IFS_TX].bytes); - create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, - read_proc_u64, - &new_iface->totals_via_dev[IFS_RX].bytes); - create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, - read_proc_u64, - &new_iface->totals_via_dev[IFS_TX].packets); - create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, - read_proc_u64, - &new_iface->totals_via_dev[IFS_RX].packets); - create_proc_read_entry("active", proc_iface_perms, proc_entry, - read_proc_bool, &new_iface->active); + proc_create_data("tx_bytes", proc_iface_perms, proc_entry, + &read_u64_fops, + &new_iface->totals_via_dev[IFS_TX].bytes); + proc_create_data("rx_bytes", proc_iface_perms, proc_entry, + &read_u64_fops, + &new_iface->totals_via_dev[IFS_RX].bytes); + proc_create_data("tx_packets", proc_iface_perms, proc_entry, + &read_u64_fops, + &new_iface->totals_via_dev[IFS_TX].packets); + proc_create_data("rx_packets", proc_iface_perms, proc_entry, + &read_u64_fops, + &new_iface->totals_via_dev[IFS_RX].packets); + proc_create_data("active", proc_iface_perms, proc_entry, + &read_bool_fops, &new_iface->active); IF_DEBUG("qtaguid: iface_stat: create_proc(): done " "entry=%p dev=%s\n", new_iface, new_iface->ifname); @@ -1596,6 +1472,33 @@ static struct notifier_block iface_inet6addr_notifier_blk = { .notifier_call = iface_inet6addr_event_handler, }; +static const struct seq_operations iface_stat_fmt_proc_seq_ops = { + .start = iface_stat_fmt_proc_start, + .next = iface_stat_fmt_proc_next, + .stop = iface_stat_fmt_proc_stop, + .show = iface_stat_fmt_proc_show, +}; + +static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file) +{ + struct proc_iface_stat_fmt_info *s; + + s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops, + sizeof(struct proc_iface_stat_fmt_info)); + if (!s) + return -ENOMEM; + + s->fmt = (int)PDE_DATA(inode); + return 0; +} + +static const struct file_operations proc_iface_stat_fmt_fops = { + .open = proc_iface_stat_fmt_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) { int err; @@ -1607,29 +1510,29 @@ static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) goto err; } - iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename, - proc_iface_perms, - parent_procdir); + iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename, + proc_iface_perms, + parent_procdir, + &proc_iface_stat_fmt_fops, + (void *)1 /* fmt1 */); if (!iface_stat_all_procfile) { pr_err("qtaguid: iface_stat: init " " failed to create stat_old proc entry\n"); err = -1; goto err_zap_entry; } - iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read; - iface_stat_all_procfile->data = (void *)1; /* fmt1 */ - iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename, - proc_iface_perms, - parent_procdir); + iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename, + proc_iface_perms, + parent_procdir, + &proc_iface_stat_fmt_fops, + (void *)2 /* fmt2 */); if (!iface_stat_fmt_procfile) { pr_err("qtaguid: iface_stat: init " " failed to create stat_all proc entry\n"); err = -1; goto err_zap_all_stats_entry; } - iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read; - iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */ err = register_netdevice_notifier(&iface_netdev_notifier_blk); @@ -1934,43 +1837,85 @@ static void prdebug_full_state(int indent_level, const char *fmt, ...) static void prdebug_full_state(int indent_level, const char *fmt, ...) {} #endif +struct proc_ctrl_print_info { + struct sock *sk; /* socket found by reading to sk_pos */ + loff_t sk_pos; +}; + +static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct proc_ctrl_print_info *pcpi = m->private; + struct sock_tag *sock_tag_entry = v; + struct rb_node *node; + + (*pos)++; + + if (!v || v == SEQ_START_TOKEN) + return NULL; + + node = rb_next(&sock_tag_entry->sock_node); + if (!node) { + pcpi->sk = NULL; + sock_tag_entry = SEQ_START_TOKEN; + } else { + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); + pcpi->sk = sock_tag_entry->sk; + } + pcpi->sk_pos = *pos; + return sock_tag_entry; +} + +static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos) +{ + struct proc_ctrl_print_info *pcpi = m->private; + struct sock_tag *sock_tag_entry; + struct rb_node *node; + + spin_lock_bh(&sock_tag_list_lock); + + if (unlikely(module_passive)) + return NULL; + + if (*pos == 0) { + pcpi->sk_pos = 0; + node = rb_first(&sock_tag_tree); + if (!node) { + pcpi->sk = NULL; + return SEQ_START_TOKEN; + } + sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); + pcpi->sk = sock_tag_entry->sk; + } else { + sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) : + NULL) ?: SEQ_START_TOKEN; + if (*pos != pcpi->sk_pos) { + /* seq_read skipped a next call */ + *pos = pcpi->sk_pos; + return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos); + } + } + return sock_tag_entry; +} + +static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v) +{ + spin_unlock_bh(&sock_tag_list_lock); +} + /* * Procfs reader to get all active socket tags using style "1)" as described in * fs/proc/generic.c */ -static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, - off_t items_to_skip, int char_count, int *eof, - void *data) +static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) { - char *outp = page; - int len; + struct sock_tag *sock_tag_entry = v; uid_t uid; - struct rb_node *node; - struct sock_tag *sock_tag_entry; - int item_index = 0; - int indent_level = 0; long f_count; - if (unlikely(module_passive)) { - *eof = 1; - return 0; - } - - if (*eof) - return 0; - - CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u " - "page=%p off=%ld char_count=%d *eof=%d\n", - current->pid, current->tgid, current_fsuid(), - page, items_to_skip, char_count, *eof); + CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n", + current->pid, current->tgid, current_fsuid()); - spin_lock_bh(&sock_tag_list_lock); - for (node = rb_first(&sock_tag_tree); - node; - node = rb_next(node)) { - if (item_index++ < items_to_skip) - continue; - sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); + if (sock_tag_entry != SEQ_START_TOKEN) { uid = get_uid_from_tag(sock_tag_entry->tag); CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) " "pid=%u\n", @@ -1981,66 +1926,42 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, ); f_count = atomic_long_read( &sock_tag_entry->socket->file->f_count); - len = snprintf(outp, char_count, - "sock=%p tag=0x%llx (uid=%u) pid=%u " - "f_count=%lu\n", - sock_tag_entry->sk, - sock_tag_entry->tag, uid, - sock_tag_entry->pid, f_count); - if (len >= char_count) { - spin_unlock_bh(&sock_tag_list_lock); - *outp = '\0'; - return outp - page; - } - outp += len; - char_count -= len; - (*num_items_returned)++; - } - spin_unlock_bh(&sock_tag_list_lock); - - if (item_index++ >= items_to_skip) { - len = snprintf(outp, char_count, - "events: sockets_tagged=%llu " - "sockets_untagged=%llu " - "counter_set_changes=%llu " - "delete_cmds=%llu " - "iface_events=%llu " - "match_calls=%llu " - "match_calls_prepost=%llu " - "match_found_sk=%llu " - "match_found_sk_in_ct=%llu " - "match_found_no_sk_in_ct=%llu " - "match_no_sk=%llu " - "match_no_sk_file=%llu\n", - atomic64_read(&qtu_events.sockets_tagged), - atomic64_read(&qtu_events.sockets_untagged), - atomic64_read(&qtu_events.counter_set_changes), - atomic64_read(&qtu_events.delete_cmds), - atomic64_read(&qtu_events.iface_events), - atomic64_read(&qtu_events.match_calls), - atomic64_read(&qtu_events.match_calls_prepost), - atomic64_read(&qtu_events.match_found_sk), - atomic64_read(&qtu_events.match_found_sk_in_ct), - atomic64_read( - &qtu_events.match_found_no_sk_in_ct), - atomic64_read(&qtu_events.match_no_sk), - atomic64_read(&qtu_events.match_no_sk_file)); - if (len >= char_count) { - *outp = '\0'; - return outp - page; - } - outp += len; - char_count -= len; - (*num_items_returned)++; - } - - /* Count the following as part of the last item_index */ - if (item_index > items_to_skip) { - prdebug_full_state(indent_level, "proc ctrl"); + seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u " + "f_count=%lu\n", + sock_tag_entry->sk, + sock_tag_entry->tag, uid, + sock_tag_entry->pid, f_count); + } else { + seq_printf(m, "events: sockets_tagged=%llu " + "sockets_untagged=%llu " + "counter_set_changes=%llu " + "delete_cmds=%llu " + "iface_events=%llu " + "match_calls=%llu " + "match_calls_prepost=%llu " + "match_found_sk=%llu " + "match_found_sk_in_ct=%llu " + "match_found_no_sk_in_ct=%llu " + "match_no_sk=%llu " + "match_no_sk_file=%llu\n", + atomic64_read(&qtu_events.sockets_tagged), + atomic64_read(&qtu_events.sockets_untagged), + atomic64_read(&qtu_events.counter_set_changes), + atomic64_read(&qtu_events.delete_cmds), + atomic64_read(&qtu_events.iface_events), + atomic64_read(&qtu_events.match_calls), + atomic64_read(&qtu_events.match_calls_prepost), + atomic64_read(&qtu_events.match_found_sk), + atomic64_read(&qtu_events.match_found_sk_in_ct), + atomic64_read(&qtu_events.match_found_no_sk_in_ct), + atomic64_read(&qtu_events.match_no_sk), + atomic64_read(&qtu_events.match_no_sk_file)); + + /* Count the following as part of the last item_index */ + prdebug_full_state(0, "proc ctrl"); } - *eof = 1; - return outp - page; + return 0; } /* @@ -2553,7 +2474,7 @@ static int qtaguid_ctrl_parse(const char *input, int count) #define MAX_QTAGUID_CTRL_INPUT_LEN 255 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + size_t count, loff_t *offp) { char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN]; @@ -2571,178 +2492,230 @@ static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, } struct proc_print_info { - char *outp; - char **num_items_returned; struct iface_stat *iface_entry; - struct tag_stat *ts_entry; int item_index; - int items_to_skip; - int char_count; + tag_t tag; /* tag found by reading to tag_pos */ + off_t tag_pos; + int tag_item_index; }; -static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) +static void pp_stats_header(struct seq_file *m) { - int len; - struct data_counters *cnts; + seq_puts(m, + "idx iface acct_tag_hex uid_tag_int cnt_set " + "rx_bytes rx_packets " + "tx_bytes tx_packets " + "rx_tcp_bytes rx_tcp_packets " + "rx_udp_bytes rx_udp_packets " + "rx_other_bytes rx_other_packets " + "tx_tcp_bytes tx_tcp_packets " + "tx_udp_bytes tx_udp_packets " + "tx_other_bytes tx_other_packets\n"); +} - if (!ppi->item_index) { - if (ppi->item_index++ < ppi->items_to_skip) - return 0; - len = snprintf(ppi->outp, ppi->char_count, - "idx iface acct_tag_hex uid_tag_int cnt_set " - "rx_bytes rx_packets " - "tx_bytes tx_packets " - "rx_tcp_bytes rx_tcp_packets " - "rx_udp_bytes rx_udp_packets " - "rx_other_bytes rx_other_packets " - "tx_tcp_bytes tx_tcp_packets " - "tx_udp_bytes tx_udp_packets " - "tx_other_bytes tx_other_packets\n"); - } else { - tag_t tag = ppi->ts_entry->tn.tag; - uid_t stat_uid = get_uid_from_tag(tag); - /* Detailed tags are not available to everybody */ - if (get_atag_from_tag(tag) - && !can_read_other_uid_stats(stat_uid)) { - CT_DEBUG("qtaguid: stats line: " - "%s 0x%llx %u: insufficient priv " - "from pid=%u tgid=%u uid=%u stats.gid=%u\n", - ppi->iface_entry->ifname, - get_atag_from_tag(tag), stat_uid, - current->pid, current->tgid, current_fsuid(), - xt_qtaguid_stats_file->gid); - return 0; - } - if (ppi->item_index++ < ppi->items_to_skip) - return 0; - cnts = &ppi->ts_entry->counters; - len = snprintf( - ppi->outp, ppi->char_count, - "%d %s 0x%llx %u %u " - "%llu %llu " - "%llu %llu " - "%llu %llu " - "%llu %llu " - "%llu %llu " - "%llu %llu " - "%llu %llu " - "%llu %llu\n", - ppi->item_index, - ppi->iface_entry->ifname, - get_atag_from_tag(tag), - stat_uid, - cnt_set, - dc_sum_bytes(cnts, cnt_set, IFS_RX), - dc_sum_packets(cnts, cnt_set, IFS_RX), - dc_sum_bytes(cnts, cnt_set, IFS_TX), - dc_sum_packets(cnts, cnt_set, IFS_TX), - cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, - cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, - cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, - cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, - cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, - cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, - cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, - cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, - cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, - cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, - cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, - cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); - } - return len; -} - -static bool pp_sets(struct proc_print_info *ppi) -{ - int len; +static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry, + int cnt_set) +{ + int ret; + struct data_counters *cnts; + tag_t tag = ts_entry->tn.tag; + uid_t stat_uid = get_uid_from_tag(tag); + struct proc_print_info *ppi = m->private; + /* Detailed tags are not available to everybody */ + if (get_atag_from_tag(tag) && !can_read_other_uid_stats(stat_uid)) { + CT_DEBUG("qtaguid: stats line: " + "%s 0x%llx %u: insufficient priv " + "from pid=%u tgid=%u uid=%u stats.gid=%u\n", + ppi->iface_entry->ifname, + get_atag_from_tag(tag), stat_uid, + current->pid, current->tgid, current_fsuid(), + xt_qtaguid_stats_file->gid); + return 0; + } + ppi->item_index++; + cnts = &ts_entry->counters; + ret = seq_printf(m, "%d %s 0x%llx %u %u " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu " + "%llu %llu\n", + ppi->item_index, + ppi->iface_entry->ifname, + get_atag_from_tag(tag), + stat_uid, + cnt_set, + dc_sum_bytes(cnts, cnt_set, IFS_RX), + dc_sum_packets(cnts, cnt_set, IFS_RX), + dc_sum_bytes(cnts, cnt_set, IFS_TX), + dc_sum_packets(cnts, cnt_set, IFS_TX), + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, + cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); + return ret ?: 1; +} + +static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry) +{ + int ret; int counter_set; for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS; counter_set++) { - len = pp_stats_line(ppi, counter_set); - if (len >= ppi->char_count) { - *ppi->outp = '\0'; + ret = pp_stats_line(m, ts_entry, counter_set); + if (ret < 0) return false; - } - if (len) { - ppi->outp += len; - ppi->char_count -= len; - (*ppi->num_items_returned)++; - } } return true; } -/* - * Procfs reader to get all tag stats using style "1)" as described in - * fs/proc/generic.c - * Groups all protocols tx/rx bytes. - */ -static int qtaguid_stats_proc_read(char *page, char **num_items_returned, - off_t items_to_skip, int char_count, int *eof, - void *data) -{ - struct proc_print_info ppi; - int len; - - ppi.outp = page; - ppi.item_index = 0; - ppi.char_count = char_count; - ppi.num_items_returned = num_items_returned; - ppi.items_to_skip = items_to_skip; - - if (unlikely(module_passive)) { - len = pp_stats_line(&ppi, 0); - /* The header should always be shorter than the buffer. */ - BUG_ON(len >= ppi.char_count); - (*num_items_returned)++; - *eof = 1; - return len; - } - - CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u " - "page=%p *num_items_returned=%p off=%ld " - "char_count=%d *eof=%d\n", - current->pid, current->tgid, current_fsuid(), - page, *num_items_returned, - items_to_skip, char_count, *eof); - - if (*eof) - return 0; +static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr) +{ + struct iface_stat *iface_entry; + + if (!ptr) + return false; + + list_for_each_entry(iface_entry, &iface_stat_list, list) + if (iface_entry == ptr) + return true; + return false; +} + +static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi) +{ + spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock); + list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) { + spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock); + return; + } + ppi->iface_entry = NULL; +} - /* The idx is there to help debug when things go belly up. */ - len = pp_stats_line(&ppi, 0); - /* Don't advance the outp unless the whole line was printed */ - if (len >= ppi.char_count) { - *ppi.outp = '\0'; - return ppi.outp - page; +static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct proc_print_info *ppi = m->private; + struct tag_stat *ts_entry; + struct rb_node *node; + + if (!v) { + pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__); + return NULL; } - if (len) { - ppi.outp += len; - ppi.char_count -= len; - (*num_items_returned)++; + + (*pos)++; + + if (!ppi->iface_entry || unlikely(module_passive)) + return NULL; + + if (v == SEQ_START_TOKEN) + node = rb_first(&ppi->iface_entry->tag_stat_tree); + else + node = rb_next(&((struct tag_stat *)v)->tn.node); + + while (!node) { + qtaguid_stats_proc_next_iface_entry(ppi); + if (!ppi->iface_entry) + return NULL; + node = rb_first(&ppi->iface_entry->tag_stat_tree); } + ts_entry = rb_entry(node, struct tag_stat, tn.node); + ppi->tag = ts_entry->tn.tag; + ppi->tag_pos = *pos; + ppi->tag_item_index = ppi->item_index; + return ts_entry; +} + +static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos) +{ + struct proc_print_info *ppi = m->private; + struct tag_stat *ts_entry = NULL; + spin_lock_bh(&iface_stat_list_lock); - list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) { - struct rb_node *node; - spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock); - for (node = rb_first(&ppi.iface_entry->tag_stat_tree); - node; - node = rb_next(node)) { - ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node); - if (!pp_sets(&ppi)) { - spin_unlock_bh( - &ppi.iface_entry->tag_stat_list_lock); - spin_unlock_bh(&iface_stat_list_lock); - return ppi.outp - page; - } + + if (*pos == 0) { + ppi->item_index = 1; + ppi->tag_pos = 0; + if (list_empty(&iface_stat_list)) { + ppi->iface_entry = NULL; + } else { + ppi->iface_entry = list_first_entry(&iface_stat_list, + struct iface_stat, + list); + spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock); + } + return SEQ_START_TOKEN; + } + if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) { + if (ppi->iface_entry) { + pr_err("qtaguid: %s(): iface_entry %p not found\n", + __func__, ppi->iface_entry); + ppi->iface_entry = NULL; + } + return NULL; + } + + spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock); + + if (!ppi->tag_pos) { + /* seq_read skipped first next call */ + ts_entry = SEQ_START_TOKEN; + } else { + ts_entry = tag_stat_tree_search( + &ppi->iface_entry->tag_stat_tree, ppi->tag); + if (!ts_entry) { + pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n", + __func__, ppi->tag); + return NULL; } - spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock); } + + if (*pos == ppi->tag_pos) { /* normal resume */ + ppi->item_index = ppi->tag_item_index; + } else { + /* seq_read skipped a next call */ + *pos = ppi->tag_pos; + ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos); + } + + return ts_entry; +} + +static void qtaguid_stats_proc_stop(struct seq_file *m, void *v) +{ + struct proc_print_info *ppi = m->private; + if (ppi->iface_entry) + spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock); spin_unlock_bh(&iface_stat_list_lock); +} - *eof = 1; - return ppi.outp - page; +/* + * Procfs reader to get all tag stats using style "1)" as described in + * fs/proc/generic.c + * Groups all protocols tx/rx bytes. + */ +static int qtaguid_stats_proc_show(struct seq_file *m, void *v) +{ + struct tag_stat *ts_entry = v; + + if (v == SEQ_START_TOKEN) + pp_stats_header(m); + else + pp_sets(m, ts_entry); + + return 0; } /*------------------------------------------*/ @@ -2907,6 +2880,47 @@ static struct miscdevice qtu_device = { /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */ }; +static const struct seq_operations proc_qtaguid_ctrl_seqops = { + .start = qtaguid_ctrl_proc_start, + .next = qtaguid_ctrl_proc_next, + .stop = qtaguid_ctrl_proc_stop, + .show = qtaguid_ctrl_proc_show, +}; + +static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file) +{ + return seq_open_private(file, &proc_qtaguid_ctrl_seqops, + sizeof(struct proc_ctrl_print_info)); +} + +static const struct file_operations proc_qtaguid_ctrl_fops = { + .open = proc_qtaguid_ctrl_open, + .read = seq_read, + .write = qtaguid_ctrl_proc_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct seq_operations proc_qtaguid_stats_seqops = { + .start = qtaguid_stats_proc_start, + .next = qtaguid_stats_proc_next, + .stop = qtaguid_stats_proc_stop, + .show = qtaguid_stats_proc_show, +}; + +static int proc_qtaguid_stats_open(struct inode *inode, struct file *file) +{ + return seq_open_private(file, &proc_qtaguid_stats_seqops, + sizeof(struct proc_print_info)); +} + +static const struct file_operations proc_qtaguid_stats_fops = { + .open = proc_qtaguid_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + /*------------------------------------------*/ static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir) { @@ -2918,26 +2932,27 @@ static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir) goto no_dir; } - xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms, - *res_procdir); + xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms, + *res_procdir, + &proc_qtaguid_ctrl_fops, + NULL); if (!xt_qtaguid_ctrl_file) { pr_err("qtaguid: failed to create xt_qtaguid/ctrl " " file\n"); ret = -ENOMEM; goto no_ctrl_entry; } - xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read; - xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write; - xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms, - *res_procdir); + xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms, + *res_procdir, + &proc_qtaguid_stats_fops, + NULL); if (!xt_qtaguid_stats_file) { pr_err("qtaguid: failed to create xt_qtaguid/stats " "file\n"); ret = -ENOMEM; goto no_stats_entry; } - xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read; /* * TODO: add support counter hacking * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write; -- GitLab From d1fd39723467985771cbaeee325d1b025b88a5dc Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 20 Feb 2013 16:38:34 -0800 Subject: [PATCH 0655/1442] ANDROID: netfilter: xt_qtaguid: fix bad tcp_time_wait sock handling Since (41063e9 ipv4: Early TCP socket demux), skb's can have an sk which is not a struct sock but the smaller struct inet_timewait_sock without an sk->sk_socket. Now we bypass sk_state == TCP_TIME_WAIT Signed-off-by: JP Abgrall --- net/netfilter/xt_qtaguid.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 435664135785..e476b88f9d68 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1597,14 +1597,13 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, return NULL; } - /* - * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs. - * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959 - * Not fixed in 3.0-r3 :( - */ if (sk) { MT_DEBUG("qtaguid: %p->sk_proto=%u " "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); + /* + * When in TCP_TIME_WAIT the sk is not a "struct sock" but + * "struct inet_timewait_sock" which is missing fields. + */ if (sk->sk_state == TCP_TIME_WAIT) { xt_socket_put_sk(sk); sk = NULL; @@ -1688,6 +1687,13 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) } sk = skb->sk; + /* + * When in TCP_TIME_WAIT the sk is not a "struct sock" but + * "struct inet_timewait_sock" which is missing fields. + * So we ignore it. + */ + if (sk && sk->sk_state == TCP_TIME_WAIT) + sk = NULL; if (sk == NULL) { /* * A missing sk->sk_socket happens when packets are in-flight -- GitLab From 564729173b122169e4b8cc6cdce754739b0a4264 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Wed, 4 Dec 2013 17:39:27 -0800 Subject: [PATCH 0656/1442] ANDROID: netfilter: xt_qtaguid: fix memory leak in seq_file handlers Change-Id: I15b21230d52479d008a00d9e2191dda020f00925 Signed-off-by: Greg Hackmann --- net/netfilter/xt_qtaguid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index e476b88f9d68..4a16829969a6 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1496,7 +1496,7 @@ static const struct file_operations proc_iface_stat_fmt_fops = { .open = proc_iface_stat_fmt_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) @@ -2904,7 +2904,7 @@ static const struct file_operations proc_qtaguid_ctrl_fops = { .read = seq_read, .write = qtaguid_ctrl_proc_write, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; static const struct seq_operations proc_qtaguid_stats_seqops = { -- GitLab From 85a2eb5b48fc59187284a0017b5643620c59b7e9 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Mon, 24 Feb 2014 09:39:46 -0800 Subject: [PATCH 0657/1442] ANDROID: netfilter: xt_qtaguid: 64-bit warning fixes Change-Id: I2adc517c0c51050ed601992fa0ea4de8f1449414 Signed-off-by: Greg Hackmann --- net/netfilter/xt_qtaguid.c | 14 +++++++------- net/netfilter/xt_quota2.c | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 4a16829969a6..eee667987f7d 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -594,7 +594,7 @@ static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry) } } -static int read_proc_u64(struct file *file, char __user *buf, +static ssize_t read_proc_u64(struct file *file, char __user *buf, size_t size, loff_t *ppos) { uint64_t *valuep = PDE_DATA(file_inode(file)); @@ -605,7 +605,7 @@ static int read_proc_u64(struct file *file, char __user *buf, return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size); } -static int read_proc_bool(struct file *file, char __user *buf, +static ssize_t read_proc_bool(struct file *file, char __user *buf, size_t size, loff_t *ppos) { bool *valuep = PDE_DATA(file_inode(file)); @@ -1488,7 +1488,7 @@ static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file) if (!s) return -ENOMEM; - s->fmt = (int)PDE_DATA(inode); + s->fmt = (uintptr_t)PDE_DATA(inode); return 0; } @@ -2440,10 +2440,10 @@ static int ctrl_cmd_untag(const char *input) return res; } -static int qtaguid_ctrl_parse(const char *input, int count) +static ssize_t qtaguid_ctrl_parse(const char *input, size_t count) { char cmd; - int res; + ssize_t res; CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n", input, current->pid, current->tgid, current_fsuid()); @@ -2474,12 +2474,12 @@ static int qtaguid_ctrl_parse(const char *input, int count) if (!res) res = count; err: - CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res); + CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res); return res; } #define MAX_QTAGUID_CTRL_INPUT_LEN 255 -static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, +static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *offp) { char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN]; diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c index 44ebdcc75965..4328562572f6 100644 --- a/net/netfilter/xt_quota2.c +++ b/net/netfilter/xt_quota2.c @@ -122,7 +122,7 @@ static void quota2_log(unsigned int hooknum, } #endif /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */ -static int quota_proc_read(struct file *file, char __user *buf, +static ssize_t quota_proc_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct xt_quota_counter *e = PDE_DATA(file_inode(file)); @@ -135,7 +135,7 @@ static int quota_proc_read(struct file *file, char __user *buf, return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size); } -static int quota_proc_write(struct file *file, const char __user *input, +static ssize_t quota_proc_write(struct file *file, const char __user *input, size_t size, loff_t *ppos) { struct xt_quota_counter *e = PDE_DATA(file_inode(file)); -- GitLab From 58ffa86630c24c21c0cb033c07e333bace2ae59c Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Thu, 29 Sep 2011 15:36:49 -0700 Subject: [PATCH 0658/1442] ANDROID: netfilter: ipv6: fix crash caused by ipv6_find_hdr() When calling: ipv6_find_hdr(skb, &thoff, -1, NULL) on a fragmented packet, thoff would be left with a random value causing callers to read random memory offsets with: skb_header_pointer(skb, thoff, ...) Now we force ipv6_find_hdr() to return a failure in this case. Calling: ipv6_find_hdr(skb, &thoff, -1, &fragoff) will set fragoff as expected, and not return a failure. Change-Id: Ib474e8a4267dd2b300feca325811330329684a88 Signed-off-by: JP Abgrall --- net/ipv6/exthdrs_core.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 305e2ed730bf..477692f80f0d 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -166,15 +166,15 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv); * to explore inner IPv6 header, eg. ICMPv6 error messages. * * If target header is found, its offset is set in *offset and return protocol - * number. Otherwise, return -1. + * number. Otherwise, return -ENOENT or -EBADMSG. * * If the first fragment doesn't contain the final protocol header or * NEXTHDR_NONE it is considered invalid. * * Note that non-1st fragment is special case that "the protocol number * of last header" is "next header" field in Fragment header. In this case, - * *offset is meaningless and fragment offset is stored in *fragoff if fragoff - * isn't NULL. + * *offset is meaningless. If fragoff is not NULL, the fragment offset is + * stored in *fragoff; if it is NULL, return -EINVAL. * * if flags is not NULL and it's a fragment, then the frag flag * IP6_FH_F_FRAG will be set. If it's an AH header, the @@ -253,9 +253,12 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, if (target < 0 && ((!ipv6_ext_hdr(hp->nexthdr)) || hp->nexthdr == NEXTHDR_NONE)) { - if (fragoff) + if (fragoff) { *fragoff = _frag_off; - return hp->nexthdr; + return hp->nexthdr; + } else { + return -EINVAL; + } } if (!found) return -ENOENT; -- GitLab From 89f9044e826c6dc9bb7bdae86dde9cec00f4c939 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 28 Mar 2014 12:19:27 -0700 Subject: [PATCH 0659/1442] ANDROID: net: kuid/kguid build fixes Small build fixes for xt_quota2 Change-Id: Ib098768040c8875887b2081c3165a6c83b37e180 Signed-off-by: John Stultz --- net/netfilter/xt_quota2.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c index 4328562572f6..99592ae56d9b 100644 --- a/net/netfilter/xt_quota2.c +++ b/net/netfilter/xt_quota2.c @@ -52,12 +52,9 @@ static DEFINE_SPINLOCK(counter_list_lock); static struct proc_dir_entry *proc_xt_quota; static unsigned int quota_list_perms = S_IRUGO | S_IWUSR; -static unsigned int quota_list_uid = 0; -static unsigned int quota_list_gid = 0; +static kuid_t quota_list_uid = KUIDT_INIT(0); +static kgid_t quota_list_gid = KGIDT_INIT(0); module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR); -module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR); -module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR); - #ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG static void quota2_log(unsigned int hooknum, -- GitLab From a347e8eff8a9abe776436b0261fd5bf33f2fbde8 Mon Sep 17 00:00:00 2001 From: "Jon Medhurst (Tixy)" Date: Mon, 14 Apr 2014 21:20:49 -0700 Subject: [PATCH 0660/1442] ANDROID: xt_qtaguid: Fix boot panic We need the change below because of mainline commit 351638e7de (net: pass info struct via netdevice notifier). Otherwise we panic. Change-Id: I7daf7513a733933fdcbaeebea7f8191f8b6a0432 Signed-off-by: John Stultz --- net/netfilter/xt_qtaguid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index eee667987f7d..4f574a6fc1fb 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1375,7 +1375,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, static int iface_netdev_event_handler(struct notifier_block *nb, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (unlikely(module_passive)) return NOTIFY_DONE; -- GitLab From eb168435d8c5e2db7387145f8e66e7be4a910945 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Tue, 25 Mar 2014 16:43:28 -0700 Subject: [PATCH 0661/1442] ANDROID: nf: IDLETIMER: time-stamp and suspend/resume handling. Message notifications contains an additional timestamp field in nano seconds. The expiry time for the timers are modified during suspend/resume. If timer was supposed to expire while the system is suspended then a notification is sent when it resumes with the timestamp of the scheduled expiry. Removes the race condition for multiple work scheduled. Bug: 13247811 Change-Id: I752c5b00225fe7085482819f975cc0eb5af89bff Signed-off-by: Ruchi Kandoi --- net/netfilter/xt_IDLETIMER.c | 169 +++++++++++++++++++++++++++++++---- 1 file changed, 152 insertions(+), 17 deletions(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 5320ff9179ac..7b8fe620cfed 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -42,6 +42,11 @@ #include #include #include +#include +#include +#include +#include +#include #include struct idletimer_tg_attr { @@ -58,22 +63,65 @@ struct idletimer_tg { struct kobject *kobj; struct idletimer_tg_attr attr; + struct timespec delayed_timer_trigger; + struct timespec last_modified_timer; + struct timespec last_suspend_time; + struct notifier_block pm_nb; + + int timeout; unsigned int refcnt; + bool work_pending; bool send_nl_msg; bool active; }; static LIST_HEAD(idletimer_tg_list); static DEFINE_MUTEX(list_mutex); +static DEFINE_SPINLOCK(timestamp_lock); static struct kobject *idletimer_tg_kobj; +static bool check_for_delayed_trigger(struct idletimer_tg *timer, + struct timespec *ts) +{ + bool state; + struct timespec temp; + spin_lock_bh(×tamp_lock); + timer->work_pending = false; + if ((ts->tv_sec - timer->last_modified_timer.tv_sec) > timer->timeout || + timer->delayed_timer_trigger.tv_sec != 0) { + state = false; + temp.tv_sec = timer->timeout; + temp.tv_nsec = 0; + if (timer->delayed_timer_trigger.tv_sec != 0) { + temp = timespec_add(timer->delayed_timer_trigger, temp); + ts->tv_sec = temp.tv_sec; + ts->tv_nsec = temp.tv_nsec; + timer->delayed_timer_trigger.tv_sec = 0; + timer->work_pending = true; + schedule_work(&timer->work); + } else { + temp = timespec_add(timer->last_modified_timer, temp); + ts->tv_sec = temp.tv_sec; + ts->tv_nsec = temp.tv_nsec; + } + } else { + state = timer->active; + } + spin_unlock_bh(×tamp_lock); + return state; +} + static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) { char iface_msg[NLMSG_MAX_SIZE]; char state_msg[NLMSG_MAX_SIZE]; - char *envp[] = { iface_msg, state_msg, NULL }; + char timestamp_msg[NLMSG_MAX_SIZE]; + char *envp[] = { iface_msg, state_msg, timestamp_msg, NULL }; int res; + struct timespec ts; + uint64_t time_ns; + bool state; res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s", iface); @@ -81,12 +129,24 @@ static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) pr_err("message too long (%d)", res); return; } + + get_monotonic_boottime(&ts); + state = check_for_delayed_trigger(timer, &ts); res = snprintf(state_msg, NLMSG_MAX_SIZE, "STATE=%s", - timer->active ? "active" : "inactive"); + state ? "active" : "inactive"); + if (NLMSG_MAX_SIZE <= res) { pr_err("message too long (%d)", res); return; } + + time_ns = timespec_to_ns(&ts); + res = snprintf(timestamp_msg, NLMSG_MAX_SIZE, "TIME_NS=%llu", time_ns); + if (NLMSG_MAX_SIZE <= res) { + timestamp_msg[0] = '\0'; + pr_err("message too long (%d)", res); + } + pr_debug("putting nlmsg: <%s> <%s>\n", iface_msg, state_msg); kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp); return; @@ -151,9 +211,55 @@ static void idletimer_tg_expired(unsigned long data) struct idletimer_tg *timer = (struct idletimer_tg *) data; pr_debug("timer %s expired\n", timer->attr.attr.name); - + spin_lock_bh(×tamp_lock); timer->active = false; + timer->work_pending = true; schedule_work(&timer->work); + spin_unlock_bh(×tamp_lock); +} + +static int idletimer_resume(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + struct timespec ts; + unsigned long time_diff, now = jiffies; + struct idletimer_tg *timer = container_of(notifier, + struct idletimer_tg, pm_nb); + if (!timer) + return NOTIFY_DONE; + switch (pm_event) { + case PM_SUSPEND_PREPARE: + get_monotonic_boottime(&timer->last_suspend_time); + break; + case PM_POST_SUSPEND: + spin_lock_bh(×tamp_lock); + if (!timer->active) { + spin_unlock_bh(×tamp_lock); + break; + } + /* since jiffies are not updated when suspended now represents + * the time it would have suspended */ + if (time_after(timer->timer.expires, now)) { + get_monotonic_boottime(&ts); + ts = timespec_sub(ts, timer->last_suspend_time); + time_diff = timespec_to_jiffies(&ts); + if (timer->timer.expires > (time_diff + now)) { + mod_timer_pending(&timer->timer, + (timer->timer.expires - time_diff)); + } else { + del_timer(&timer->timer); + timer->timer.expires = 0; + timer->active = false; + timer->work_pending = true; + schedule_work(&timer->work); + } + } + spin_unlock_bh(×tamp_lock); + break; + default: + break; + } + return NOTIFY_DONE; } static int idletimer_tg_create(struct idletimer_tg_info *info) @@ -188,6 +294,18 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) info->timer->refcnt = 1; info->timer->send_nl_msg = (info->send_nl_msg == 0) ? false : true; info->timer->active = true; + info->timer->timeout = info->timeout; + + info->timer->delayed_timer_trigger.tv_sec = 0; + info->timer->delayed_timer_trigger.tv_nsec = 0; + info->timer->work_pending = false; + get_monotonic_boottime(&info->timer->last_modified_timer); + + info->timer->pm_nb.notifier_call = idletimer_resume; + ret = register_pm_notifier(&info->timer->pm_nb); + if (ret) + printk(KERN_WARNING "[%s] Failed to register pm notifier %d\n", + __func__, ret); mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); @@ -204,6 +322,34 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) return ret; } +static void reset_timer(const struct idletimer_tg_info *info) +{ + unsigned long now = jiffies; + struct idletimer_tg *timer = info->timer; + bool timer_prev; + + spin_lock_bh(×tamp_lock); + timer_prev = timer->active; + timer->active = true; + /* timer_prev is used to guard overflow problem in time_before*/ + if (!timer_prev || time_before(timer->timer.expires, now)) { + pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", + timer->timer.expires, now); + /* checks if there is a pending inactive notification*/ + if (timer->work_pending) + timer->delayed_timer_trigger = timer->last_modified_timer; + else { + timer->work_pending = true; + schedule_work(&timer->work); + } + } + + get_monotonic_boottime(&timer->last_modified_timer); + mod_timer(&timer->timer, + msecs_to_jiffies(info->timeout * 1000) + now); + spin_unlock_bh(×tamp_lock); +} + /* * The actual xt_tables plugin. */ @@ -227,9 +373,7 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb, } /* TODO: Avoid modifying timers on each packet */ - mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + now); - + reset_timer(info); return XT_CONTINUE; } @@ -258,17 +402,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { info->timer->refcnt++; - info->timer->active = true; - - if (time_before(info->timer->timer.expires, now)) { - schedule_work(&info->timer->work); - pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", - info->timer->timer.expires, now); - } - - mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + now); - + reset_timer(info); pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); } else { @@ -300,6 +434,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) del_timer_sync(&info->timer->timer); cancel_work_sync(&info->timer->work); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); + unregister_pm_notifier(&info->timer->pm_nb); kfree(info->timer->attr.attr.name); kfree(info->timer); } else { -- GitLab From 13e257eaa624b1322b62ac21d81a398a38462324 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Thu, 24 Apr 2014 14:07:53 -0700 Subject: [PATCH 0662/1442] ANDROID: nf: Remove compilation error caused by e8430cbed3ef15fdb1ac26cfd020e010aa5f1c35 Signed-off-by: Ruchi Kandoi --- net/netfilter/xt_IDLETIMER.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 7b8fe620cfed..2e08868505d1 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -381,7 +381,6 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) { struct idletimer_tg_info *info = par->targinfo; int ret; - unsigned long now = jiffies; pr_debug("checkentry targinfo %s\n", info->label); -- GitLab From da5ea99a74f2f8c4414e1a89b376fb501d246441 Mon Sep 17 00:00:00 2001 From: Sherman Yin Date: Thu, 12 Jun 2014 14:35:38 -0700 Subject: [PATCH 0663/1442] ANDROID: netfilter: fix seq_printf type mismatch warning The return type of atomic64_read() varies depending on arch. The arm64 version is being changed from long long to long in the mainline for v3.16, causing a seq_printf type mismatch (%llu) in guid_ctrl_proc_show(). This commit fixes the type mismatch by casting atomic64_read() to u64. Change-Id: Iae0a6bd4314f5686a9f4fecbe6203e94ec0870de Signed-off-by: Sherman Yin --- net/netfilter/xt_qtaguid.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 4f574a6fc1fb..7c9be1706533 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1950,18 +1950,18 @@ static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) "match_found_no_sk_in_ct=%llu " "match_no_sk=%llu " "match_no_sk_file=%llu\n", - atomic64_read(&qtu_events.sockets_tagged), - atomic64_read(&qtu_events.sockets_untagged), - atomic64_read(&qtu_events.counter_set_changes), - atomic64_read(&qtu_events.delete_cmds), - atomic64_read(&qtu_events.iface_events), - atomic64_read(&qtu_events.match_calls), - atomic64_read(&qtu_events.match_calls_prepost), - atomic64_read(&qtu_events.match_found_sk), - atomic64_read(&qtu_events.match_found_sk_in_ct), - atomic64_read(&qtu_events.match_found_no_sk_in_ct), - atomic64_read(&qtu_events.match_no_sk), - atomic64_read(&qtu_events.match_no_sk_file)); + (u64)atomic64_read(&qtu_events.sockets_tagged), + (u64)atomic64_read(&qtu_events.sockets_untagged), + (u64)atomic64_read(&qtu_events.counter_set_changes), + (u64)atomic64_read(&qtu_events.delete_cmds), + (u64)atomic64_read(&qtu_events.iface_events), + (u64)atomic64_read(&qtu_events.match_calls), + (u64)atomic64_read(&qtu_events.match_calls_prepost), + (u64)atomic64_read(&qtu_events.match_found_sk), + (u64)atomic64_read(&qtu_events.match_found_sk_in_ct), + (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct), + (u64)atomic64_read(&qtu_events.match_no_sk), + (u64)atomic64_read(&qtu_events.match_no_sk_file)); /* Count the following as part of the last item_index */ prdebug_full_state(0, "proc ctrl"); -- GitLab From 5ecc80776f0b7e28034335943b4fb24e92baf897 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Thu, 23 Apr 2015 12:09:09 -0700 Subject: [PATCH 0664/1442] ANDROID: nf: IDLETIMER: Adds the uid field in the msg Message notifications contains an additional uid field. This field represents the uid that was responsible for waking the radio. And hence it is present only in notifications stating that the radio is now active. Change-Id: I18fc73eada512e370d7ab24fc9f890845037b729 Signed-off-by: Ruchi Kandoi Bug: 20264396 --- net/netfilter/xt_IDLETIMER.c | 37 +++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 2e08868505d1..48d4cb75c399 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -48,6 +48,7 @@ #include #include #include +#include struct idletimer_tg_attr { struct attribute attr; @@ -73,6 +74,7 @@ struct idletimer_tg { bool work_pending; bool send_nl_msg; bool active; + uid_t uid; }; static LIST_HEAD(idletimer_tg_list); @@ -117,7 +119,8 @@ static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) char iface_msg[NLMSG_MAX_SIZE]; char state_msg[NLMSG_MAX_SIZE]; char timestamp_msg[NLMSG_MAX_SIZE]; - char *envp[] = { iface_msg, state_msg, timestamp_msg, NULL }; + char uid_msg[NLMSG_MAX_SIZE]; + char *envp[] = { iface_msg, state_msg, timestamp_msg, uid_msg, NULL }; int res; struct timespec ts; uint64_t time_ns; @@ -140,6 +143,16 @@ static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) return; } + if (state) { + res = snprintf(uid_msg, NLMSG_MAX_SIZE, "UID=%u", timer->uid); + if (NLMSG_MAX_SIZE <= res) + pr_err("message too long (%d)", res); + } else { + res = snprintf(uid_msg, NLMSG_MAX_SIZE, "UID="); + if (NLMSG_MAX_SIZE <= res) + pr_err("message too long (%d)", res); + } + time_ns = timespec_to_ns(&ts); res = snprintf(timestamp_msg, NLMSG_MAX_SIZE, "TIME_NS=%llu", time_ns); if (NLMSG_MAX_SIZE <= res) { @@ -147,7 +160,8 @@ static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) pr_err("message too long (%d)", res); } - pr_debug("putting nlmsg: <%s> <%s>\n", iface_msg, state_msg); + pr_debug("putting nlmsg: <%s> <%s> <%s> <%s>\n", iface_msg, state_msg, + timestamp_msg, uid_msg); kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp); return; @@ -299,6 +313,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) info->timer->delayed_timer_trigger.tv_sec = 0; info->timer->delayed_timer_trigger.tv_nsec = 0; info->timer->work_pending = false; + info->timer->uid = 0; get_monotonic_boottime(&info->timer->last_modified_timer); info->timer->pm_nb.notifier_call = idletimer_resume; @@ -322,7 +337,8 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) return ret; } -static void reset_timer(const struct idletimer_tg_info *info) +static void reset_timer(const struct idletimer_tg_info *info, + struct sk_buff *skb) { unsigned long now = jiffies; struct idletimer_tg *timer = info->timer; @@ -335,6 +351,17 @@ static void reset_timer(const struct idletimer_tg_info *info) if (!timer_prev || time_before(timer->timer.expires, now)) { pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", timer->timer.expires, now); + + /* Stores the uid resposible for waking up the radio */ + if (skb && (skb->sk)) { + struct sock *sk = skb->sk; + read_lock_bh(&sk->sk_callback_lock); + if ((sk->sk_socket) && (sk->sk_socket->file) && + (sk->sk_socket->file->f_cred)) + timer->uid = sk->sk_socket->file->f_cred->uid; + read_unlock_bh(&sk->sk_callback_lock); + } + /* checks if there is a pending inactive notification*/ if (timer->work_pending) timer->delayed_timer_trigger = timer->last_modified_timer; @@ -373,7 +400,7 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb, } /* TODO: Avoid modifying timers on each packet */ - reset_timer(info); + reset_timer(info, skb); return XT_CONTINUE; } @@ -401,7 +428,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { info->timer->refcnt++; - reset_timer(info); + reset_timer(info, NULL); pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); } else { -- GitLab From 22ea73dee0367b7655e64ae082e68ede82a28b53 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 11 May 2015 14:39:59 +0530 Subject: [PATCH 0665/1442] ANDROID: nf: IDLETIMER: Fix broken uid field in the msg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create uid from kuid to fix the broken uid field in the message notifications introduced in Change-Id: I18fc73eada512e370d7ab24fc9f890845037b729, Otherwise we run into following build error: --------------- CC net/netfilter/xt_IDLETIMER.o net/netfilter/xt_IDLETIMER.c: In function ‘reset_timer’: net/netfilter/xt_IDLETIMER.c:360:16: error: incompatible types when assigning to type ‘uid_t’ from type ‘kuid_t’ make[2]: *** [net/netfilter/xt_IDLETIMER.o] Error 1 --------------- Signed-off-by: Amit Pundir (cherry picked from commit 706060ba3e1dee9ec3c4a4a1480d663b6cd71cad) Change-Id: Ifd66df45a58d1a5a60c3816c373ee3008292eee8 --- net/netfilter/xt_IDLETIMER.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 48d4cb75c399..4a2d853a90b2 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -354,12 +354,8 @@ static void reset_timer(const struct idletimer_tg_info *info, /* Stores the uid resposible for waking up the radio */ if (skb && (skb->sk)) { - struct sock *sk = skb->sk; - read_lock_bh(&sk->sk_callback_lock); - if ((sk->sk_socket) && (sk->sk_socket->file) && - (sk->sk_socket->file->f_cred)) - timer->uid = sk->sk_socket->file->f_cred->uid; - read_unlock_bh(&sk->sk_callback_lock); + timer->uid = from_kuid_munged(current_user_ns(), + sock_i_uid(skb->sk)); } /* checks if there is a pending inactive notification*/ -- GitLab From 6b1064db6e31027e98e4c243ffcad57c6b2ac7ae Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 26 Mar 2014 19:35:41 +0900 Subject: [PATCH 0666/1442] ANDROID: net: ipv6: autoconf routes into per-device tables Currently, IPv6 router discovery always puts routes into RT6_TABLE_MAIN. This causes problems for connection managers that want to support multiple simultaneous network connections and want control over which one is used by default (e.g., wifi and wired). To work around this connection managers typically take the routes they prefer and copy them to static routes with low metrics in the main table. This puts the burden on the connection manager to watch netlink to see if the routes have changed, delete the routes when their lifetime expires, etc. Instead, this patch adds a per-interface sysctl to have the kernel put autoconf routes into different tables. This allows each interface to have its own autoconf table, and choosing the default interface (or using different interfaces at the same time for different types of traffic) can be done using appropriate ip rules. The sysctl behaves as follows: - = 0: default. Put routes into RT6_TABLE_MAIN as before. - > 0: manual. Put routes into the specified table. - < 0: automatic. Add the absolute value of the sysctl to the device's ifindex, and use that table. The automatic mode is most useful in conjunction with net.ipv6.conf.default.accept_ra_rt_table. A connection manager or distribution could set it to, say, -100 on boot, and thereafter just use IP rules. Change-Id: I82d16e3737d9cdfa6489e649e247894d0d60cbb1 Signed-off-by: Lorenzo Colitti [AmitP: Refactored original changes to align with the changes introduced by upstream commit 830218c1add1 ("net: ipv6: Fix processing of RAs in presence of VRF")] Signed-off-by: Amit Pundir --- include/linux/ipv6.h | 1 + include/net/addrconf.h | 2 ++ include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 39 +++++++++++++++++++++++++++++-- net/ipv6/route.c | 49 +++++++++------------------------------ 5 files changed, 52 insertions(+), 40 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a0649973ee5b..7aebe23acb55 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -40,6 +40,7 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif + __s32 accept_ra_rt_table; __s32 proxy_ndp; __s32 accept_source_route; __s32 accept_ra_from_local; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 8f998afc1384..e1bd2bc0277a 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -242,6 +242,8 @@ static inline bool ipv6_is_mld(struct sk_buff *skb, int nexthdr, int offset) void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao); +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table); + /* * anycast prototypes (anycast.c) */ diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 8c2772340c3f..1049c7828141 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -164,6 +164,7 @@ enum { DEVCONF_ACCEPT_DAD, DEVCONF_FORCE_TLLAO, DEVCONF_NDISC_NOTIFY, + DEVCONF_ACCEPT_RA_RT_TABLE, DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL, DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL, DEVCONF_SUPPRESS_FRAG_NDISC, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4bc5ba3ae452..92aebe11d172 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -226,6 +226,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .accept_ra_rt_table = 0, .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, @@ -272,6 +273,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .accept_ra_rt_table = 0, .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, @@ -2200,6 +2202,31 @@ static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad ipv6_regen_rndid(idev); } +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table) { + /* Determines into what table to put autoconf PIO/RIO/default routes + * learned on this device. + * + * - If 0, use the same table for every device. This puts routes into + * one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route + * (but note that these three are currently all equal to + * RT6_TABLE_MAIN). + * - If > 0, use the specified table. + * - If < 0, put routes into table dev->ifindex + (-rt_table). + */ + struct inet6_dev *idev = in6_dev_get(dev); + u32 table; + int sysctl = idev->cnf.accept_ra_rt_table; + if (sysctl == 0) { + table = default_table; + } else if (sysctl > 0) { + table = (u32) sysctl; + } else { + table = (unsigned) dev->ifindex + (-sysctl); + } + in6_dev_put(idev); + return table; +} + /* * Add prefix route. */ @@ -2209,7 +2236,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { struct fib6_config cfg = { - .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX, + .fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX), .fc_metric = IP6_RT_PRIO_ADDRCONF, .fc_ifindex = dev->ifindex, .fc_expires = expires, @@ -2242,7 +2269,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX; + u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX); table = fib6_get_table(dev_net(dev), tb_id); if (!table) @@ -4928,6 +4955,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif + array[DEVCONF_ACCEPT_RA_RT_TABLE] = cnf->accept_ra_rt_table; array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -5910,6 +5938,13 @@ static const struct ctl_table addrconf_sysctl[] = { }, #endif #endif + { + .procname = "accept_ra_rt_table", + .data = &ipv6_devconf.accept_ra_rt_table, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "proxy_ndp", .data = &ipv6_devconf.proxy_ndp, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1b57e11e6e0d..0b0cdb16bcd1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2335,7 +2335,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, struct net_device *dev) { - u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; + u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO); int ifindex = dev->ifindex; struct fib6_node *fn; struct rt6_info *rt = NULL; @@ -2351,7 +2351,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, goto out; for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - if (rt->dst.dev->ifindex != ifindex) + if (rt->dst.dev->ifindex != dev->ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; @@ -2382,7 +2382,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; - cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, + cfg.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO), cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; @@ -2398,7 +2398,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) { - u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; + u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_MAIN); struct rt6_info *rt; struct fib6_table *table; @@ -2424,7 +2424,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, unsigned int pref) { struct fib6_config cfg = { - .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, + .fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT), .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | @@ -2447,43 +2447,16 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, return rt6_get_dflt_router(gwaddr, dev); } -static void __rt6_purge_dflt_routers(struct fib6_table *table) -{ - struct rt6_info *rt; - -restart: - read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && - (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - ip6_del_rt(rt); - goto restart; - } - } - read_unlock_bh(&table->tb6_lock); - - table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; +int rt6_addrconf_purge(struct rt6_info *rt, void *arg) { + if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) + return -1; + return 0; } void rt6_purge_dflt_routers(struct net *net) { - struct fib6_table *table; - struct hlist_head *head; - unsigned int h; - - rcu_read_lock(); - - for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { - head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, head, tb6_hlist) { - if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) - __rt6_purge_dflt_routers(table); - } - } - - rcu_read_unlock(); + fib6_clean_all(net, rt6_addrconf_purge, NULL); } static void rtmsg_to_fib6_config(struct net *net, -- GitLab From bd1bca4efb8e5146ebe5f486cc78092da8a18827 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 28 Mar 2014 16:23:48 -0700 Subject: [PATCH 0667/1442] ANDROID: netfilter: Build fixups - kuid/kguid changes & xt_socket_get/put_sk Fix up build kuid/kguid build issues in netfilter code. Also re-add the xt_socket_get/put_sk interfaces needed by xt_qtaguid. Change-Id: I7027fb840e109785bddffe8ea717b8d018b26d82 Signed-off-by: John Stultz --- include/uapi/linux/netfilter/xt_socket.h | 6 ++ net/netfilter/xt_qtaguid.c | 121 +++++++++++++---------- net/netfilter/xt_socket.c | 16 ++- 3 files changed, 88 insertions(+), 55 deletions(-) diff --git a/include/uapi/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h index 87644f832494..8f4da12ca571 100644 --- a/include/uapi/linux/netfilter/xt_socket.h +++ b/include/uapi/linux/netfilter/xt_socket.h @@ -26,4 +26,10 @@ struct xt_socket_mtinfo3 { | XT_SOCKET_NOWILDCARD \ | XT_SOCKET_RESTORESKMARK) +void xt_socket_put_sk(struct sock *sk); +struct sock *xt_socket_get4_sk(const struct sk_buff *skb, + struct xt_action_param *par); +struct sock *xt_socket_get6_sk(const struct sk_buff *skb, + struct xt_action_param *par); + #endif /* _XT_SOCKET_H */ diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 7c9be1706533..0ad8d7a896cf 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -145,22 +145,22 @@ static bool can_manipulate_uids(void) { /* root pwnd */ return in_egroup_p(xt_qtaguid_ctrl_file->gid) - || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited) - || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid); + || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || unlikely(!proc_ctrl_write_limited) + || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid)); } -static bool can_impersonate_uid(uid_t uid) +static bool can_impersonate_uid(kuid_t uid) { - return uid == current_fsuid() || can_manipulate_uids(); + return uid_eq(uid, current_fsuid()) || can_manipulate_uids(); } -static bool can_read_other_uid_stats(uid_t uid) +static bool can_read_other_uid_stats(kuid_t uid) { /* root pwnd */ return in_egroup_p(xt_qtaguid_stats_file->gid) - || unlikely(!current_fsuid()) || uid == current_fsuid() + || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || uid_eq(uid, current_fsuid()) || unlikely(!proc_stats_readall_limited) - || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid); + || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid)); } static inline void dc_add_byte_packets(struct data_counters *counters, int set, @@ -542,7 +542,7 @@ static void put_utd_entry(struct uid_tag_data *utd_entry) "erase utd_entry=%p uid=%u " "by pid=%u tgid=%u uid=%u\n", __func__, utd_entry, utd_entry->uid, - current->pid, current->tgid, current_fsuid()); + current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid())); BUG_ON(utd_entry->num_active_tags); rb_erase(&utd_entry->node, &uid_tag_data_tree); kfree(utd_entry); @@ -744,7 +744,7 @@ static int iface_stat_fmt_proc_show(struct seq_file *m, void *v) CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n", - current->pid, current->tgid, current_fsuid()); + current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid())); iface_entry = list_entry(v, struct iface_stat, list); @@ -1656,7 +1656,7 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct file *filp; bool got_sock = false; struct sock *sk; - uid_t sock_uid; + kuid_t sock_uid; bool res; if (unlikely(module_passive)) @@ -1720,7 +1720,7 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); filp = sk->sk_socket ? sk->sk_socket->file : NULL; MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", - par->hooknum, filp ? filp->f_cred->fsuid : -1); + par->hooknum, filp ? from_kuid(&init_user_ns, filp->f_cred->fsuid) : -1); } if (sk == NULL || sk->sk_socket == NULL) { @@ -1761,7 +1761,7 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) * For now we only do iface stats when the uid-owner is not requested */ if (!(info->match & XT_QTAGUID_UID)) - account_for_uid(skb, sk, sock_uid, par); + account_for_uid(skb, sk, from_kuid(&init_user_ns, sock_uid), par); /* * The following two tests fail the match when: @@ -1769,25 +1769,32 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) * or id in range AND inverted condition requested * Thus (!a && b) || (a && !b) == a ^ b */ - if (info->match & XT_QTAGUID_UID) - if ((filp->f_cred->fsuid >= info->uid_min && - filp->f_cred->fsuid <= info->uid_max) ^ + if (info->match & XT_QTAGUID_UID) { + kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min); + kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max); + + if (uid_gte(filp->f_cred->fsuid, uid_min) && + uid_lte(filp->f_cred->fsuid, uid_max) ^ !(info->invert & XT_QTAGUID_UID)) { MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", par->hooknum); res = false; goto put_sock_ret_res; } - if (info->match & XT_QTAGUID_GID) - if ((filp->f_cred->fsgid >= info->gid_min && - filp->f_cred->fsgid <= info->gid_max) ^ + } + if (info->match & XT_QTAGUID_GID) { + kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min); + kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max); + + if (gid_gte(filp->f_cred->fsgid, gid_min) && + gid_lte(filp->f_cred->fsgid, gid_max) ^ !(info->invert & XT_QTAGUID_GID)) { MT_DEBUG("qtaguid[%d]: leaving gid not matching\n", par->hooknum); res = false; goto put_sock_ret_res; } - + } MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum); res = true; @@ -1919,7 +1926,7 @@ static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) long f_count; CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n", - current->pid, current->tgid, current_fsuid()); + current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid())); if (sock_tag_entry != SEQ_START_TOKEN) { uid = get_uid_from_tag(sock_tag_entry->tag); @@ -1977,7 +1984,8 @@ static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) static int ctrl_cmd_delete(const char *input) { char cmd; - uid_t uid; + int uid_int; + kuid_t uid; uid_t entry_uid; tag_t acct_tag; tag_t tag; @@ -1991,10 +1999,11 @@ static int ctrl_cmd_delete(const char *input) struct tag_ref *tr_entry; struct uid_tag_data *utd_entry; - argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid); + argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid_int); + uid = make_kuid(&init_user_ns, uid_int); CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c " "user_tag=0x%llx uid=%u\n", input, argc, cmd, - acct_tag, uid); + acct_tag, uid_int); if (argc < 2) { res = -EINVAL; goto err; @@ -2006,18 +2015,19 @@ static int ctrl_cmd_delete(const char *input) } if (argc < 3) { uid = current_fsuid(); + uid_int = from_kuid(&init_user_ns, uid); } else if (!can_impersonate_uid(uid)) { pr_info("qtaguid: ctrl_delete(%s): " "insufficient priv from pid=%u tgid=%u uid=%u\n", - input, current->pid, current->tgid, current_fsuid()); + input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid())); res = -EPERM; goto err; } - tag = combine_atag_with_uid(acct_tag, uid); + tag = combine_atag_with_uid(acct_tag, uid_int); CT_DEBUG("qtaguid: ctrl_delete(%s): " "looking for tag=0x%llx (uid=%u)\n", - input, tag, uid); + input, tag, uid_int); /* Delete socket tags */ spin_lock_bh(&sock_tag_list_lock); @@ -2026,7 +2036,7 @@ static int ctrl_cmd_delete(const char *input) st_entry = rb_entry(node, struct sock_tag, sock_node); entry_uid = get_uid_from_tag(st_entry->tag); node = rb_next(node); - if (entry_uid != uid) + if (entry_uid != uid_int) continue; CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n", @@ -2087,7 +2097,7 @@ static int ctrl_cmd_delete(const char *input) "ts tag=0x%llx (uid=%u)\n", input, ts_entry->tn.tag, entry_uid); - if (entry_uid != uid) + if (entry_uid != uid_int) continue; if (!acct_tag || ts_entry->tn.tag == tag) { CT_DEBUG("qtaguid: ctrl_delete(%s): " @@ -2116,7 +2126,7 @@ static int ctrl_cmd_delete(const char *input) "utd uid=%u\n", input, entry_uid); - if (entry_uid != uid) + if (entry_uid != uid_int) continue; /* * Go over the tag_refs, and those that don't have @@ -2160,7 +2170,7 @@ static int ctrl_cmd_counter_set(const char *input) if (!can_manipulate_uids()) { pr_info("qtaguid: ctrl_counterset(%s): " "insufficient priv from pid=%u tgid=%u uid=%u\n", - input, current->pid, current->tgid, current_fsuid()); + input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid())); res = -EPERM; goto err; } @@ -2197,7 +2207,8 @@ static int ctrl_cmd_tag(const char *input) { char cmd; int sock_fd = 0; - uid_t uid = 0; + kuid_t uid; + unsigned int uid_int = 0; tag_t acct_tag = make_atag_from_value(0); tag_t full_tag; struct socket *el_socket; @@ -2208,10 +2219,11 @@ static int ctrl_cmd_tag(const char *input) struct proc_qtu_data *pqd_entry; /* Unassigned args will get defaulted later. */ - argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid); + argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid_int); + uid = make_kuid(&init_user_ns, uid_int); CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d " "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd, - acct_tag, uid); + acct_tag, uid_int); if (argc < 2) { res = -EINVAL; goto err; @@ -2221,7 +2233,7 @@ static int ctrl_cmd_tag(const char *input) pr_info("qtaguid: ctrl_tag(%s): failed to lookup" " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n", input, sock_fd, res, current->pid, current->tgid, - current_fsuid()); + from_kuid(&init_user_ns, current_fsuid())); goto err; } CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", @@ -2237,21 +2249,24 @@ static int ctrl_cmd_tag(const char *input) CT_DEBUG("qtaguid: ctrl_tag(%s): " "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " "ctrl.gid=%u in_group()=%d in_egroup()=%d\n", - input, current->pid, current->tgid, current_uid(), - current_euid(), current_fsuid(), - xt_qtaguid_ctrl_file->gid, + input, current->pid, current->tgid, + from_kuid(&init_user_ns, current_uid()), + from_kuid(&init_user_ns, current_euid()), + from_kuid(&init_user_ns, current_fsuid()), + from_kgid(&init_user_ns, xt_qtaguid_ctrl_file->gid), in_group_p(xt_qtaguid_ctrl_file->gid), in_egroup_p(xt_qtaguid_ctrl_file->gid)); if (argc < 4) { uid = current_fsuid(); + uid_int = from_kuid(&init_user_ns, uid); } else if (!can_impersonate_uid(uid)) { pr_info("qtaguid: ctrl_tag(%s): " "insufficient priv from pid=%u tgid=%u uid=%u\n", - input, current->pid, current->tgid, current_fsuid()); + input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid())); res = -EPERM; goto err_put; } - full_tag = combine_atag_with_uid(acct_tag, uid); + full_tag = combine_atag_with_uid(acct_tag, uid_int); spin_lock_bh(&sock_tag_list_lock); sock_tag_entry = get_sock_stat_nl(el_socket->sk); @@ -2298,8 +2313,7 @@ static int ctrl_cmd_tag(const char *input) sock_tag_entry->sk = el_socket->sk; sock_tag_entry->socket = el_socket; sock_tag_entry->pid = current->tgid; - sock_tag_entry->tag = combine_atag_with_uid(acct_tag, - uid); + sock_tag_entry->tag = combine_atag_with_uid(acct_tag, uid_int); spin_lock_bh(&uid_tag_data_tree_lock); pqd_entry = proc_qtu_data_tree_search( &proc_qtu_data_tree, current->tgid); @@ -2314,7 +2328,7 @@ static int ctrl_cmd_tag(const char *input) "User space forgot to open /dev/xt_qtaguid? " "pid=%u tgid=%u uid=%u\n", __func__, current->pid, current->tgid, - current_fsuid()); + from_kuid(&init_user_ns, current_fsuid())); else list_add(&sock_tag_entry->list, &pqd_entry->sock_tag_list); @@ -2369,7 +2383,7 @@ static int ctrl_cmd_untag(const char *input) pr_info("qtaguid: ctrl_untag(%s): failed to lookup" " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n", input, sock_fd, res, current->pid, current->tgid, - current_fsuid()); + from_kuid(&init_user_ns, current_fsuid())); goto err; } CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", @@ -2403,7 +2417,7 @@ static int ctrl_cmd_untag(const char *input) pr_warn_once("qtaguid: %s(): " "User space forgot to open /dev/xt_qtaguid? " "pid=%u tgid=%u uid=%u\n", __func__, - current->pid, current->tgid, current_fsuid()); + current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid())); else list_del(&sock_tag_entry->list); spin_unlock_bh(&uid_tag_data_tree_lock); @@ -2446,7 +2460,7 @@ static ssize_t qtaguid_ctrl_parse(const char *input, size_t count) ssize_t res; CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n", - input, current->pid, current->tgid, current_fsuid()); + input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid())); cmd = input[0]; /* Collect params for commands */ @@ -2528,14 +2542,15 @@ static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry, uid_t stat_uid = get_uid_from_tag(tag); struct proc_print_info *ppi = m->private; /* Detailed tags are not available to everybody */ - if (get_atag_from_tag(tag) && !can_read_other_uid_stats(stat_uid)) { + if (get_atag_from_tag(tag) && !can_read_other_uid_stats( + make_kuid(&init_user_ns,stat_uid))) { CT_DEBUG("qtaguid: stats line: " "%s 0x%llx %u: insufficient priv " "from pid=%u tgid=%u uid=%u stats.gid=%u\n", ppi->iface_entry->ifname, get_atag_from_tag(tag), stat_uid, - current->pid, current->tgid, current_fsuid(), - xt_qtaguid_stats_file->gid); + current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()), + from_kgid(&init_user_ns,xt_qtaguid_stats_file->gid)); return 0; } ppi->item_index++; @@ -2737,12 +2752,12 @@ static int qtudev_open(struct inode *inode, struct file *file) return 0; DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n", - current->pid, current->tgid, current_fsuid()); + current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid())); spin_lock_bh(&uid_tag_data_tree_lock); /* Look for existing uid data, or alloc one. */ - utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); + utd_entry = get_uid_data(from_kuid(&init_user_ns, current_fsuid()), &utd_entry_found); if (IS_ERR_OR_NULL(utd_entry)) { res = PTR_ERR(utd_entry); goto err_unlock; @@ -2754,7 +2769,7 @@ static int qtudev_open(struct inode *inode, struct file *file) if (pqd_entry) { pr_err("qtaguid: qtudev_open(): %u/%u %u " "%s already opened\n", - current->pid, current->tgid, current_fsuid(), + current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()), QTU_DEV_NAME); res = -EBUSY; goto err_unlock_free_utd; @@ -2764,7 +2779,7 @@ static int qtudev_open(struct inode *inode, struct file *file) if (!new_pqd_entry) { pr_err("qtaguid: qtudev_open(): %u/%u %u: " "proc data alloc failed\n", - current->pid, current->tgid, current_fsuid()); + current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid())); res = -ENOMEM; goto err_unlock_free_utd; } @@ -2778,7 +2793,7 @@ static int qtudev_open(struct inode *inode, struct file *file) spin_unlock_bh(&uid_tag_data_tree_lock); DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n", - current_fsuid(), new_pqd_entry); + from_kuid(&init_user_ns, current_fsuid()), new_pqd_entry); file->private_data = new_pqd_entry; return 0; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index b10ade272b50..45346438b250 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -35,6 +35,16 @@ #include #endif +void +xt_socket_put_sk(struct sock *sk) +{ + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_put(inet_twsk(sk)); + else + sock_put(sk); +} +EXPORT_SYMBOL(xt_socket_put_sk); + static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, @@ -144,7 +154,7 @@ static bool xt_socket_sk_is_transparent(struct sock *sk) } } -static struct sock *xt_socket_lookup_slow_v4(struct net *net, +struct sock *xt_socket_lookup_slow_v4(struct net *net, const struct sk_buff *skb, const struct net_device *indev) { @@ -208,6 +218,7 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net, return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, daddr, sport, dport, indev); } +EXPORT_SYMBOL(xt_socket_lookup_slow_v4); static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, @@ -344,7 +355,7 @@ xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, return NULL; } -static struct sock *xt_socket_lookup_slow_v6(struct net *net, +struct sock *xt_socket_lookup_slow_v6(struct net *net, const struct sk_buff *skb, const struct net_device *indev) { @@ -390,6 +401,7 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net, return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, sport, dport, indev); } +EXPORT_SYMBOL(xt_socket_lookup_slow_v6); static bool socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) -- GitLab From 070eff8f023c8102da06566aa3724aa414c2e7ae Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 20 Jan 2015 16:13:08 +0530 Subject: [PATCH 0668/1442] ANDROID: xt_qtaguid: fix broken uid/gid range check The existing test to check if current uid/gid is within valid range is broken due to missing parenthesis. Change-Id: I889ebbd0e2ea6a9426cb1509a2975e7107666407 Signed-off-by: Amit Pundir Signed-off-by: John Stultz --- net/netfilter/xt_qtaguid.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 0ad8d7a896cf..9664bec1091c 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1773,8 +1773,8 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min); kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max); - if (uid_gte(filp->f_cred->fsuid, uid_min) && - uid_lte(filp->f_cred->fsuid, uid_max) ^ + if ((uid_gte(filp->f_cred->fsuid, uid_min) && + uid_lte(filp->f_cred->fsuid, uid_max)) ^ !(info->invert & XT_QTAGUID_UID)) { MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", par->hooknum); @@ -1786,8 +1786,8 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min); kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max); - if (gid_gte(filp->f_cred->fsgid, gid_min) && - gid_lte(filp->f_cred->fsgid, gid_max) ^ + if ((gid_gte(filp->f_cred->fsgid, gid_min) && + gid_lte(filp->f_cred->fsgid, gid_max)) ^ !(info->invert & XT_QTAGUID_GID)) { MT_DEBUG("qtaguid[%d]: leaving gid not matching\n", par->hooknum); -- GitLab From e8101fd0e91139a7d6ad19802c902c51be937dd2 Mon Sep 17 00:00:00 2001 From: Mohamad Ayyash Date: Tue, 13 Jan 2015 19:20:44 -0800 Subject: [PATCH 0669/1442] ANDROID: xt_qtaguid: Use sk_callback_lock read locks before reading sk->sk_socket It prevents a kernel panic when accessing sk->sk_socket fields due to NULLing sk->sk_socket when sock_orphan is called through sk_common_release. Change-Id: I4aa46b4e2d8600e4d4ef8dcdd363aa4e6e5f8433 Signed-off-by: Mohamad Ayyash (cherry picked from commit cdea0ebcb8bcfe57688f6cb692b49e550ebd9796) Signed-off-by: John Stultz --- net/netfilter/xt_qtaguid.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 9664bec1091c..e33be3aaf094 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1658,6 +1658,7 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk; kuid_t sock_uid; bool res; + bool set_sk_callback_lock = false; if (unlikely(module_passive)) return (info->match ^ info->invert) == 0; @@ -1715,6 +1716,8 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n", par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par)); if (sk != NULL) { + set_sk_callback_lock = true; + read_lock_bh(&sk->sk_callback_lock); MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", par->hooknum, sk, sk->sk_socket, sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); @@ -1801,6 +1804,8 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) put_sock_ret_res: if (got_sock) xt_socket_put_sk(sk); + if (set_sk_callback_lock) + read_unlock_bh(&sk->sk_callback_lock); ret_res: MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res); return res; -- GitLab From 2879b6ec24ee1734b31a306987b9e122a67b7643 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 29 Jan 2015 01:16:23 +0530 Subject: [PATCH 0670/1442] ANDROID: xt_qtaguid: use sock_gen_put() instead of xt_socket_put_sk() Removing obsolete xt_socket_put_sk() and using sock_gen_put() instead. xt_socket_put_sk() was reintroduced for xt_qtaguid in one of the patches, but it turned out sock_gen_put() supersedes xt_socket_put_sk(). So we don't need xt_socket_put_sk() any more. This patch is based on commit 1a8bf6eeef9f (netfilter: xt_socket: use sock_gen_put()) Change-Id: I976d5f7f7eded0f3cc91b596acfeb35e4c2057e5 Signed-off-by: Amit Pundir (cherry picked from commit 551780fc28cb7480dbc4f585ef80ca02c2922ec1) Signed-off-by: John Stultz --- include/uapi/linux/netfilter/xt_socket.h | 1 - net/netfilter/xt_qtaguid.c | 4 ++-- net/netfilter/xt_socket.c | 10 ---------- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/include/uapi/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h index 8f4da12ca571..ac645e69716f 100644 --- a/include/uapi/linux/netfilter/xt_socket.h +++ b/include/uapi/linux/netfilter/xt_socket.h @@ -26,7 +26,6 @@ struct xt_socket_mtinfo3 { | XT_SOCKET_NOWILDCARD \ | XT_SOCKET_RESTORESKMARK) -void xt_socket_put_sk(struct sock *sk); struct sock *xt_socket_get4_sk(const struct sk_buff *skb, struct xt_action_param *par); struct sock *xt_socket_get6_sk(const struct sk_buff *skb, diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index e33be3aaf094..2f9784c1e692 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1605,7 +1605,7 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, * "struct inet_timewait_sock" which is missing fields. */ if (sk->sk_state == TCP_TIME_WAIT) { - xt_socket_put_sk(sk); + sock_gen_put(sk); sk = NULL; } } @@ -1803,7 +1803,7 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) put_sock_ret_res: if (got_sock) - xt_socket_put_sk(sk); + sock_gen_put(sk); if (set_sk_callback_lock) read_unlock_bh(&sk->sk_callback_lock); ret_res: diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 45346438b250..69775b49e485 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -35,16 +35,6 @@ #include #endif -void -xt_socket_put_sk(struct sock *sk) -{ - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put(inet_twsk(sk)); - else - sock_put(sk); -} -EXPORT_SYMBOL(xt_socket_put_sk); - static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, -- GitLab From 20e85a204318325111d2fc44b15be81ebb8589ce Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 3 Sep 2015 14:48:52 -0700 Subject: [PATCH 0671/1442] ANDROID: net: xt_qtaguid/xt_socket: fix refcount underflow and crash xt_socket_get[4|6]_sk() do not always increment sock refcount, which causes confusion in xt_qtaguid module which is not aware of this fact and drops the reference whether it should have or not. Fix it by changing xt_socket_get[4|6]_sk() to always increment recount of returned sock. This should fix the following crash: [ 111.319523] BUG: failure at /mnt/host/source/src/third_party/kernel/v3.18/net/ipv4/inet_timewait_sock.c:90/__inet_twsk_kill()! [ 111.331192] Kernel panic - not syncing: BUG! [ 111.335468] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U W 3.18.0-06867-g268df91 #1 [ 111.343810] Hardware name: Google Tegra210 Smaug Rev 1+ (DT) [ 111.349463] Call trace: [ 111.351917] [] dump_backtrace+0x0/0x10c [ 111.357314] [] show_stack+0x10/0x1c [ 111.362367] [] dump_stack+0x74/0x94 [ 111.367414] [] panic+0xec/0x238 [ 111.372116] [] __inet_twsk_kill+0xd0/0xf8 [ 111.377684] [] inet_twdr_do_twkill_work+0x64/0xd0 [ 111.383946] [] inet_twdr_hangman+0x2c/0xa4 [ 111.389602] [] call_timer_fn+0xac/0x160 [ 111.394995] [] run_timer_softirq+0x23c/0x274 [ 111.400824] [] __do_softirq+0x1a4/0x330 [ 111.406218] [] irq_exit+0x70/0xd0 [ 111.411093] [] __handle_domain_irq+0x84/0xa8 [ 111.416922] [] gic_handle_irq+0x4c/0x80 b/22476945 Originally reviewed at: https://chromium-review.googlesource.com/#/c/297414/ Change-Id: I51fa94a9d92a84a0bd3b58466d711e46a6892a79 Signed-off-by: Dmitry Torokhov [jstultz: Cherry-picked and added missing local var definition] Signed-off-by: John Stultz --- net/netfilter/xt_socket.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 69775b49e485..a52fbaf52691 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -151,6 +151,7 @@ struct sock *xt_socket_lookup_slow_v4(struct net *net, const struct iphdr *iph = ip_hdr(skb); struct sk_buff *data_skb = NULL; int doff = 0; + struct sock *sk = skb->sk; __be32 uninitialized_var(daddr), uninitialized_var(saddr); __be16 uninitialized_var(dport), uninitialized_var(sport); u8 uninitialized_var(protocol); @@ -205,8 +206,14 @@ struct sock *xt_socket_lookup_slow_v4(struct net *net, } #endif - return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, - daddr, sport, dport, indev); + if (sk) + atomic_inc(&sk->sk_refcnt); + else + sk = xt_socket_get_sock_v4(dev_net(skb->dev), data_skb, doff, + protocol, saddr, daddr, sport, + dport, indev); + + return sk; } EXPORT_SYMBOL(xt_socket_lookup_slow_v4); @@ -240,8 +247,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, transparent) pskb->mark = sk->sk_mark; - if (sk != skb->sk) - sock_gen_put(sk); + sock_gen_put(sk); if (wildcard || !transparent) sk = NULL; @@ -349,6 +355,7 @@ struct sock *xt_socket_lookup_slow_v6(struct net *net, const struct sk_buff *skb, const struct net_device *indev) { + struct sock *sk = skb->sk; __be16 uninitialized_var(dport), uninitialized_var(sport); const struct in6_addr *daddr = NULL, *saddr = NULL; struct ipv6hdr *iph = ipv6_hdr(skb); @@ -388,8 +395,14 @@ struct sock *xt_socket_lookup_slow_v6(struct net *net, return NULL; } - return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, - sport, dport, indev); + if (sk) + atomic_inc(&sk->sk_refcnt); + else + sk = xt_socket_get_sock_v6(dev_net(skb->dev), data_skb, doff, + tproto, saddr, daddr, sport, dport, + indev); + + return sk; } EXPORT_SYMBOL(xt_socket_lookup_slow_v6); -- GitLab From 6e2b405681d25dd5352637373659aa1f39211fa8 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 7 Jul 2015 00:28:49 +0530 Subject: [PATCH 0672/1442] ANDROID: netfilter: xt_qtaguid: xt_socket: build fixes Add missing header and use xt_socket_lookup_slow_v* instead of xt_socket_get*_sk in xt_qtaguid.c. Fix xt_socket_lookup_slow_v* functions in xt_socket.c and declare them in xt_socket.h Change-Id: I55819b2d4ffa82a2be20995c87d28fb5cc77b5ba Signed-off-by: Amit Pundir Signed-off-by: John Stultz --- include/uapi/linux/netfilter/xt_socket.h | 8 ++++---- net/netfilter/xt_qtaguid.c | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h index ac645e69716f..15b4c1cf99bb 100644 --- a/include/uapi/linux/netfilter/xt_socket.h +++ b/include/uapi/linux/netfilter/xt_socket.h @@ -26,9 +26,9 @@ struct xt_socket_mtinfo3 { | XT_SOCKET_NOWILDCARD \ | XT_SOCKET_RESTORESKMARK) -struct sock *xt_socket_get4_sk(const struct sk_buff *skb, - struct xt_action_param *par); -struct sock *xt_socket_get6_sk(const struct sk_buff *skb, - struct xt_action_param *par); +struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb, + const struct net_device *indev); +struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, + const struct net_device *indev); #endif /* _XT_SOCKET_H */ diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 2f9784c1e692..90b2c6aac7d8 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1588,10 +1589,10 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, switch (par->family) { case NFPROTO_IPV6: - sk = xt_socket_get6_sk(skb, par); + sk = xt_socket_lookup_slow_v6(skb, par->in); break; case NFPROTO_IPV4: - sk = xt_socket_get4_sk(skb, par); + sk = xt_socket_lookup_slow_v4(skb, par->in); break; default: return NULL; -- GitLab From 7de1bb86dc5ab1b727a72cd773cc825b459685ec Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 20 Nov 2015 14:45:40 +0530 Subject: [PATCH 0673/1442] ANDROID: netfilter: xt_qtaguid/socket: build fixes for 4.4 Update xt_socket_lookup_slow_v* usage in aosp patches, to align with changes from mainline commit 686c9b50809d "netfilter: x_tables: Use par->net instead of computing from the passed net devices". Signed-off-by: Amit Pundir --- include/uapi/linux/netfilter/xt_socket.h | 10 ++++++---- net/netfilter/xt_qtaguid.c | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h index 15b4c1cf99bb..7f00df6cd897 100644 --- a/include/uapi/linux/netfilter/xt_socket.h +++ b/include/uapi/linux/netfilter/xt_socket.h @@ -26,9 +26,11 @@ struct xt_socket_mtinfo3 { | XT_SOCKET_NOWILDCARD \ | XT_SOCKET_RESTORESKMARK) -struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb, - const struct net_device *indev); -struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, - const struct net_device *indev); +struct sock *xt_socket_lookup_slow_v4(struct net *net, + const struct sk_buff *skb, + const struct net_device *indev); +struct sock *xt_socket_lookup_slow_v6(struct net *net, + const struct sk_buff *skb, + const struct net_device *indev); #endif /* _XT_SOCKET_H */ diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 90b2c6aac7d8..62ddd6cd1ee8 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1589,10 +1589,10 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, switch (par->family) { case NFPROTO_IPV6: - sk = xt_socket_lookup_slow_v6(skb, par->in); + sk = xt_socket_lookup_slow_v6(dev_net(skb->dev), skb, par->in); break; case NFPROTO_IPV4: - sk = xt_socket_lookup_slow_v4(skb, par->in); + sk = xt_socket_lookup_slow_v4(dev_net(skb->dev), skb, par->in); break; default: return NULL; -- GitLab From f84e6a1ce4e90f629fb0cbbc0fe419c6a84072a3 Mon Sep 17 00:00:00 2001 From: "liping.zhang" Date: Mon, 11 Jan 2016 13:31:01 +0800 Subject: [PATCH 0674/1442] ANDROID: xt_qtaguid: fix a race condition in if_tag_stat_update Miss a lock protection in if_tag_stat_update while doing get_iface_entry. So if one CPU is doing iface_stat_create while another CPU is doing if_tag_stat_update, race will happened. Change-Id: Ib8d98e542f4e385685499f5b7bb7354f08654a75 Signed-off-by: Liping Zhang --- net/netfilter/xt_qtaguid.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 62ddd6cd1ee8..04bb081adde8 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1291,11 +1291,12 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", ifname, uid, sk, direction, proto, bytes); - + spin_lock_bh(&iface_stat_list_lock); iface_entry = get_iface_entry(ifname); if (!iface_entry) { pr_err_ratelimited("qtaguid: iface_stat: stat_update() " "%s not found\n", ifname); + spin_unlock_bh(&iface_stat_list_lock); return; } /* It is ok to process data when an iface_entry is inactive */ @@ -1331,8 +1332,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, * {0, uid_tag} will also get updated. */ tag_stat_update(tag_stat_entry, direction, proto, bytes); - spin_unlock_bh(&iface_entry->tag_stat_list_lock); - return; + goto unlock; } /* Loop over tag list under this interface for {0,uid_tag} */ @@ -1372,6 +1372,7 @@ static void if_tag_stat_update(const char *ifname, uid_t uid, tag_stat_update(new_tag_stat, direction, proto, bytes); unlock: spin_unlock_bh(&iface_entry->tag_stat_list_lock); + spin_unlock_bh(&iface_stat_list_lock); } static int iface_netdev_event_handler(struct notifier_block *nb, -- GitLab From 7220b97d37056204b5b97c275e90c1d0898cc7e6 Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Fri, 7 Feb 2014 18:40:10 -0800 Subject: [PATCH 0675/1442] ANDROID: tcp: add a sysctl to config the tcp_default_init_rwnd The default initial rwnd is hardcoded to 10. Now we allow it to be controlled via /proc/sys/net/ipv4/tcp_default_init_rwnd which limits the values from 3 to 100 This is somewhat needed because ipv6 routes are autoconfigured by the kernel. See "An Argument for Increasing TCP's Initial Congestion Window" in https://developers.google.com/speed/articles/tcp_initcwnd_paper.pdf Change-Id: I386b2a9d62de0ebe05c1ebe1b4bd91b314af5c54 Signed-off-by: JP Abgrall Conflicts: net/ipv4/sysctl_net_ipv4.c net/ipv4/tcp_input.c --- include/net/tcp.h | 1 + net/ipv4/sysctl_net_ipv4.c | 22 ++++++++++++++++++++++ net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_output.c | 2 +- 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 123979fe12bf..2700f9286783 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -271,6 +271,7 @@ extern int sysctl_tcp_autocorking; extern int sysctl_tcp_invalid_ratelimit; extern int sysctl_tcp_pacing_ss_ratio; extern int sysctl_tcp_pacing_ca_ratio; +extern int sysctl_tcp_default_init_rwnd; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 80bc36b25de2..4ebc5378d3f7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -151,6 +151,21 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, return ret; } +/* Validate changes from /proc interface. */ +static int proc_tcp_default_init_rwnd(ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int old_value = *(int *)ctl->data; + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + int new_value = *(int *)ctl->data; + + if (write && ret == 0 && (new_value < 3 || new_value > 100)) + *(int *)ctl->data = old_value; + + return ret; +} + static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -623,6 +638,13 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, + { + .procname = "tcp_default_init_rwnd", + .data = &sysctl_tcp_default_init_rwnd, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tcp_default_init_rwnd + }, { .procname = "icmp_msgs_per_sec", .data = &sysctl_icmp_msgs_per_sec, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c71d49ce0c93..7fb67045fcb9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -100,6 +100,7 @@ int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_early_retrans __read_mostly = 3; int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; +int sysctl_tcp_default_init_rwnd __read_mostly = TCP_INIT_CWND * 2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 896e9dfbdb5c..cd8e1898d4f4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -188,7 +188,7 @@ u32 tcp_default_init_rwnd(u32 mss) * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a * limit when mss is larger than 1460. */ - u32 init_rwnd = TCP_INIT_CWND * 2; + u32 init_rwnd = sysctl_tcp_default_init_rwnd; if (mss > 1460) init_rwnd = max((1460 * init_rwnd) / mss, 2U); -- GitLab From 30efeba9a8c3427fcddeafdf2c39b5e11aaf0c05 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 26 Mar 2014 13:03:12 +0900 Subject: [PATCH 0676/1442] ANDROID: net: support marking accepting TCP sockets When using mark-based routing, sockets returned from accept() may need to be marked differently depending on the incoming connection request. This is the case, for example, if different socket marks identify different networks: a listening socket may want to accept connections from all networks, but each connection should be marked with the network that the request came in on, so that subsequent packets are sent on the correct network. This patch adds a sysctl to mark TCP sockets based on the fwmark of the incoming SYN packet. If enabled, and an unmarked socket receives a SYN, then the SYN packet's fwmark is written to the connection's inet_request_sock, and later written back to the accepted socket when the connection is established. If the socket already has a nonzero mark, then the behaviour is the same as it is today, i.e., the listening socket's fwmark is used. Black-box tested using user-mode linux: - IPv4/IPv6 SYN+ACK, FIN, etc. packets are routed based on the mark of the incoming SYN packet. - The socket returned by accept() is marked with the mark of the incoming SYN packet. - Tested with syncookies=1 and syncookies=2. Change-Id: I26bc1eceefd2c588d73b921865ab70e4645ade57 Signed-off-by: Lorenzo Colitti --- Documentation/networking/ip-sysctl.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 3db8c67d2c8d..da258de13bd7 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -603,6 +603,16 @@ tcp_fastopen - INTEGER Note that that additional client or server features are only effective if the basic support (0x1 and 0x2) are enabled respectively. +tcp_fwmark_accept - BOOLEAN + If set, incoming connections to listening sockets that do not have a + socket mark will set the mark of the accepting socket to the fwmark of + the incoming SYN packet. This will cause all packets on that connection + (starting from the first SYNACK) to be sent with that fwmark. The + listening socket's mark is unchanged. Listening sockets that already + have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are + unaffected. + Default: 0 + tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt will be retransmitted. Should not be higher than 127. Default value -- GitLab From 3823c8b26e6e64427898d75c4f20bb44f04ef438 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 27 Oct 2015 18:09:40 -0700 Subject: [PATCH 0677/1442] ANDROID: tcp: fix tcp_default_init_rwnd() for 4.1 Change-Id: If3ecf5f59acf379ffcc468f28434830a92b0383d Signed-off-by: Dmitry Shmidt --- net/ipv4/sysctl_net_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4ebc5378d3f7..cf7cfa45c7d2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -152,7 +152,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, } /* Validate changes from /proc interface. */ -static int proc_tcp_default_init_rwnd(ctl_table *ctl, int write, +static int proc_tcp_default_init_rwnd(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { -- GitLab From 3fc1c613eaf81dfeee021e7255582b7e33ddcec9 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Wed, 15 Apr 2015 20:29:09 +0530 Subject: [PATCH 0678/1442] ANDROID: net: ipv6: fix virtual tunneling build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IPv6 PMTUD has been updated to support UID-based routing. Pass INVALID_UID as the socket UID to ip6_update_pmtu(), otherwise we run into following build error: ---------------- CC net/ipv6/ip6_vti.o net/ipv6/ip6_vti.c: In function ‘vti6_err’: net/ipv6/ip6_vti.c:559:3: error: too few arguments to function ‘ip6_update_pmtu’ In file included from include/net/ip_tunnels.h:19:0, from net/ipv6/ip6_vti.c:44: include/net/ip6_route.h:110:6: note: declared here make[2]: *** [net/ipv6/ip6_vti.o] Error 1 ---------------- Signed-off-by: Amit Pundir --- net/ipv6/ip6_vti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index c299c1e2bbf0..d58480a9215e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -610,7 +610,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); xfrm_state_put(x); return 0; -- GitLab From de6f7210e93118ce8a5aa4307d2642879c2aa0c9 Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Thu, 4 Dec 2008 17:37:05 -0800 Subject: [PATCH 0679/1442] ANDROID: rfkill: Introduce CONFIG_RFKILL_PM and use instead of CONFIG_PM to power down Some platforms do not want to power down rfkill devices on suspend. Change-Id: I62a11630521c636d54a4a02ab9037a43435925f5 Signed-off-by: Nick Pelly --- net/rfkill/Kconfig | 5 +++++ net/rfkill/core.c | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index 868f1ad0415a..8463a6d4d508 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -10,6 +10,11 @@ menuconfig RFKILL To compile this driver as a module, choose M here: the module will be called rfkill. +config RFKILL_PM + bool "Power off on suspend" + depends on RFKILL && PM + default y + # LED trigger support config RFKILL_LEDS bool diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 884027f62783..9b4260dab2a4 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -796,7 +796,7 @@ void rfkill_resume_polling(struct rfkill *rfkill) } EXPORT_SYMBOL(rfkill_resume_polling); -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_RFKILL_PM static int rfkill_suspend(struct device *dev) { struct rfkill *rfkill = to_rfkill(dev); @@ -837,7 +837,9 @@ static struct class rfkill_class = { .dev_release = rfkill_release, .dev_groups = rfkill_dev_groups, .dev_uevent = rfkill_dev_uevent, +#ifdef CONFIG_RFKILL_PM .pm = RFKILL_PM_OPS, +#endif }; bool rfkill_blocked(struct rfkill *rfkill) -- GitLab From d457781cec2c59ab66b0cd3885b880284aac626d Mon Sep 17 00:00:00 2001 From: Ashish Sharma Date: Fri, 7 Oct 2011 17:54:16 -0700 Subject: [PATCH 0680/1442] ANDROID: bridge: Have tx_bytes count headers like rx_bytes. Since rx_bytes accounting does not include Ethernet Headers in br_input.c, excluding ETH_HLEN on the transmit path for consistent measurement of packet length on both the Tx and Rx chains. The clean way would be for Rx to include the eth header, but the skb len has already been adjusted by the time the br code sees the skb. This is only a temporary workaround until we can completely ignore or cleanly fix the skb->len handling. Change-Id: I910de95a4686b2119da7f1f326e2154ef31f9972 Signed-off-by: Ashish Sharma --- net/bridge/br_device.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 89a687f3c0a3..fcaa48402aa3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -48,16 +48,17 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } - u64_stats_update_begin(&brstats->syncp); - brstats->tx_packets++; - brstats->tx_bytes += skb->len; - u64_stats_update_end(&brstats->syncp); - BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); + u64_stats_update_begin(&brstats->syncp); + brstats->tx_packets++; + /* Exclude ETH_HLEN from byte stats for consistency with Rx chain */ + brstats->tx_bytes += skb->len; + u64_stats_update_end(&brstats->syncp); + if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid)) goto out; -- GitLab From 9d0c7e69e946442641bac8eb7babfc0120af7664 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 24 Jun 2014 09:36:50 -0700 Subject: [PATCH 0681/1442] ANDROID: net: wireless: Decrease scan entry expiration to avoid stall results Change-Id: I0e23ce45d78d7c17633670973f49943a5ed6032d Signed-off-by: Dmitry Shmidt --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 35ad69fd0838..e31887810b54 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -70,7 +70,7 @@ module_param(bss_entries_limit, int, 0644); MODULE_PARM_DESC(bss_entries_limit, "limit to number of scan BSS entries (per wiphy, default 1000)"); -#define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ) +#define IEEE80211_SCAN_RESULT_EXPIRE (7 * HZ) static void bss_free(struct cfg80211_internal_bss *bss) { -- GitLab From 31401634fa02629ed95c0dc004367d954772e6b1 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Thu, 18 Mar 2010 16:04:18 -0700 Subject: [PATCH 0682/1442] ANDROID: wlan: Create generic wlan platform data header Change-Id: I233559218cc40acb28b57315ea25c08a9c866725 Signed-off-by: Dmitry Shmidt --- include/linux/wlan_plat.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/wlan_plat.h diff --git a/include/linux/wlan_plat.h b/include/linux/wlan_plat.h new file mode 100644 index 000000000000..70ee63b44ad6 --- /dev/null +++ b/include/linux/wlan_plat.h @@ -0,0 +1,25 @@ +/* include/linux/wlan_plat.h + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef _LINUX_WLAN_PLAT_H_ +#define _LINUX_WLAN_PLAT_H_ + +struct wifi_platform_data { + int (*set_power)(int val); + int (*set_reset)(int val); + int (*set_carddetect)(int val); + void *(*mem_prealloc)(int section, unsigned long size); +}; + +#endif -- GitLab From 85cb8cd410e3d630cba50cb603e3f672612465b1 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Thu, 3 Jun 2010 10:55:33 -0700 Subject: [PATCH 0683/1442] ANDROID: network: wireless: Add get_mac_addr functionality to platform Signed-off-by: Dmitry Shmidt --- include/linux/wlan_plat.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/wlan_plat.h b/include/linux/wlan_plat.h index 70ee63b44ad6..3b1e2e054fd5 100644 --- a/include/linux/wlan_plat.h +++ b/include/linux/wlan_plat.h @@ -20,6 +20,7 @@ struct wifi_platform_data { int (*set_reset)(int val); int (*set_carddetect)(int val); void *(*mem_prealloc)(int section, unsigned long size); + int (*get_mac_addr)(unsigned char *buf); }; #endif -- GitLab From fbbbc1491c46d486a812d297e8666b7c2833b0d4 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Mon, 14 Feb 2011 16:58:48 -0800 Subject: [PATCH 0684/1442] ANDROID: net: wireless: Add get_country_code functionality to platform Signed-off-by: Dmitry Shmidt --- include/linux/wlan_plat.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/wlan_plat.h b/include/linux/wlan_plat.h index 3b1e2e054fd5..40ec3482d1ef 100644 --- a/include/linux/wlan_plat.h +++ b/include/linux/wlan_plat.h @@ -21,6 +21,7 @@ struct wifi_platform_data { int (*set_carddetect)(int val); void *(*mem_prealloc)(int section, unsigned long size); int (*get_mac_addr)(unsigned char *buf); + void *(*get_country_code)(char *ccode); }; #endif -- GitLab From 92ed16e283e54f711368ada7ffd5bc7143c9596a Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Fri, 22 Aug 2014 14:40:18 -0700 Subject: [PATCH 0685/1442] ANDROID: Add flags parameter to get_country_code template Change-Id: Ic3f173db144a301ea104f544fc8ec723241c1d59 Signed-off-by: Dmitry Shmidt --- include/linux/wlan_plat.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/wlan_plat.h b/include/linux/wlan_plat.h index 40ec3482d1ef..8ad2dbd0c296 100644 --- a/include/linux/wlan_plat.h +++ b/include/linux/wlan_plat.h @@ -15,13 +15,15 @@ #ifndef _LINUX_WLAN_PLAT_H_ #define _LINUX_WLAN_PLAT_H_ +#define WLAN_PLAT_NODFS_FLAG 0x01 + struct wifi_platform_data { int (*set_power)(int val); int (*set_reset)(int val); int (*set_carddetect)(int val); void *(*mem_prealloc)(int section, unsigned long size); int (*get_mac_addr)(unsigned char *buf); - void *(*get_country_code)(char *ccode); + void *(*get_country_code)(char *ccode, u32 flags); }; #endif -- GitLab From 5c34cd9c39a64239e30c4a0d274861c106bf10af Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Mon, 12 Jan 2015 13:42:05 -0800 Subject: [PATCH 0686/1442] ANDROID: wlan: Add get_wake_irq functionality Change-Id: Ic41f06c509b2e625dc9ec4131903db6920c5fd4e Signed-off-by: Dmitry Shmidt --- include/linux/wlan_plat.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/wlan_plat.h b/include/linux/wlan_plat.h index 8ad2dbd0c296..8e8b06f1ba4a 100644 --- a/include/linux/wlan_plat.h +++ b/include/linux/wlan_plat.h @@ -23,6 +23,7 @@ struct wifi_platform_data { int (*set_carddetect)(int val); void *(*mem_prealloc)(int section, unsigned long size); int (*get_mac_addr)(unsigned char *buf); + int (*get_wake_irq)(void); void *(*get_country_code)(char *ccode, u32 flags); }; -- GitLab From 21b36ead6629ab239ea8db724f0cc3425d510477 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Fri, 1 Jul 2011 17:19:56 -0700 Subject: [PATCH 0687/1442] ANDROID: USB: OTG: Take wakelock when VBUS present Enabled by default, can disable with: echo N > /sys/module/otg_wakelock/parameters/enabled Change-Id: I34974624c52ae23490852b44c270d2f326cf6116 Signed-off-by: Todd Poynor --- drivers/usb/phy/Kconfig | 8 ++ drivers/usb/phy/Makefile | 2 +- drivers/usb/phy/otg-wakelock.c | 190 +++++++++++++++++++++++++++++++++ 3 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 drivers/usb/phy/otg-wakelock.c diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index b9c409a18faa..63bbfb4e1be6 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -6,6 +6,14 @@ menu "USB Physical Layer drivers" config USB_PHY def_bool n +config USB_OTG_WAKELOCK + bool "Hold a wakelock when USB connected" + depends on WAKELOCK + select USB_OTG_UTILS + help + Select this to automatically hold a wakelock when USB is + connected, preventing suspend. + # # USB Transceiver Drivers # diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile index b433e5d89be4..fee55847a89c 100644 --- a/drivers/usb/phy/Makefile +++ b/drivers/usb/phy/Makefile @@ -3,7 +3,7 @@ # obj-$(CONFIG_USB_PHY) += phy.o obj-$(CONFIG_OF) += of.o - +obj-$(CONFIG_USB_OTG_WAKELOCK) += otg-wakelock.o # transceiver drivers, keep the list sorted obj-$(CONFIG_AB8500_USB) += phy-ab8500-usb.o diff --git a/drivers/usb/phy/otg-wakelock.c b/drivers/usb/phy/otg-wakelock.c new file mode 100644 index 000000000000..993162674836 --- /dev/null +++ b/drivers/usb/phy/otg-wakelock.c @@ -0,0 +1,190 @@ +/* + * otg-wakelock.c + * + * Copyright (C) 2011 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include + +static bool enabled = true; +static struct otg_transceiver *otgwl_xceiv; +static struct notifier_block otgwl_nb; + +/* + * otgwl_spinlock is held while the VBUS lock is grabbed or dropped and the + * locked field is updated to match. + */ + +static DEFINE_SPINLOCK(otgwl_spinlock); + +/* + * Only one lock, but since these 3 fields are associated with each other... + */ + +struct otgwl_lock { + char name[40]; + struct wake_lock wakelock; + bool locked; +}; + +/* + * VBUS present lock. + */ + +static struct otgwl_lock vbus_lock; + +static void otgwl_grab(struct otgwl_lock *lock) +{ + if (!lock->locked) { + wake_lock(&lock->wakelock); + lock->locked = true; + } +} + +static void otgwl_drop(struct otgwl_lock *lock) +{ + if (lock->locked) { + wake_unlock(&lock->wakelock); + lock->locked = false; + } +} + +static int otgwl_otg_notifications(struct notifier_block *nb, + unsigned long event, void *unused) +{ + unsigned long irqflags; + + if (!enabled) + return NOTIFY_OK; + + spin_lock_irqsave(&otgwl_spinlock, irqflags); + + switch (event) { + case USB_EVENT_VBUS: + case USB_EVENT_ENUMERATED: + otgwl_grab(&vbus_lock); + break; + + case USB_EVENT_NONE: + case USB_EVENT_ID: + case USB_EVENT_CHARGER: + otgwl_drop(&vbus_lock); + break; + + default: + break; + } + + spin_unlock_irqrestore(&otgwl_spinlock, irqflags); + return NOTIFY_OK; +} + +static void sync_with_xceiv_state(void) +{ + if ((otgwl_xceiv->last_event == USB_EVENT_VBUS) || + (otgwl_xceiv->last_event == USB_EVENT_ENUMERATED)) + otgwl_grab(&vbus_lock); + else + otgwl_drop(&vbus_lock); +} + +static int init_for_xceiv(void) +{ + int rv; + + if (!otgwl_xceiv) { + otgwl_xceiv = otg_get_transceiver(); + + if (!otgwl_xceiv) { + pr_err("%s: No OTG transceiver found\n", __func__); + return -ENODEV; + } + + snprintf(vbus_lock.name, sizeof(vbus_lock.name), "vbus-%s", + dev_name(otgwl_xceiv->dev)); + wake_lock_init(&vbus_lock.wakelock, WAKE_LOCK_SUSPEND, + vbus_lock.name); + + rv = otg_register_notifier(otgwl_xceiv, &otgwl_nb); + + if (rv) { + pr_err("%s: otg_register_notifier on transceiver %s" + " failed\n", __func__, + dev_name(otgwl_xceiv->dev)); + otgwl_xceiv = NULL; + wake_lock_destroy(&vbus_lock.wakelock); + return rv; + } + } + + return 0; +} + +static int set_enabled(const char *val, const struct kernel_param *kp) +{ + unsigned long irqflags; + int rv = param_set_bool(val, kp); + + if (rv) + return rv; + + rv = init_for_xceiv(); + + if (rv) + return rv; + + spin_lock_irqsave(&otgwl_spinlock, irqflags); + + if (enabled) + sync_with_xceiv_state(); + else + otgwl_drop(&vbus_lock); + + spin_unlock_irqrestore(&otgwl_spinlock, irqflags); + return 0; +} + +static struct kernel_param_ops enabled_param_ops = { + .set = set_enabled, + .get = param_get_bool, +}; + +module_param_cb(enabled, &enabled_param_ops, &enabled, 0644); +MODULE_PARM_DESC(enabled, "enable wakelock when VBUS present"); + +static int __init otg_wakelock_init(void) +{ + unsigned long irqflags; + + otgwl_nb.notifier_call = otgwl_otg_notifications; + + if (!init_for_xceiv()) { + spin_lock_irqsave(&otgwl_spinlock, irqflags); + + if (enabled) + sync_with_xceiv_state(); + + spin_unlock_irqrestore(&otgwl_spinlock, irqflags); + } else { + enabled = false; + } + + return 0; +} + +late_initcall(otg_wakelock_init); -- GitLab From 2c7e32437ed7d2d7d7d6544f9d7be3fcb1d7b268 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Mon, 26 Sep 2011 20:35:30 -0700 Subject: [PATCH 0688/1442] ANDROID: usb: otg: Temporarily grab wakelock on charger and disconnect events Change-Id: If995d4af4adcb08e8369009483f2956ad9627267 Signed-off-by: Todd Poynor --- drivers/usb/phy/otg-wakelock.c | 133 ++++++++++++++------------------- 1 file changed, 56 insertions(+), 77 deletions(-) diff --git a/drivers/usb/phy/otg-wakelock.c b/drivers/usb/phy/otg-wakelock.c index 993162674836..2f11472dd2b3 100644 --- a/drivers/usb/phy/otg-wakelock.c +++ b/drivers/usb/phy/otg-wakelock.c @@ -21,13 +21,15 @@ #include #include +#define TEMPORARY_HOLD_TIME 2000 + static bool enabled = true; static struct otg_transceiver *otgwl_xceiv; static struct notifier_block otgwl_nb; /* * otgwl_spinlock is held while the VBUS lock is grabbed or dropped and the - * locked field is updated to match. + * held field is updated to match. */ static DEFINE_SPINLOCK(otgwl_spinlock); @@ -39,51 +41,62 @@ static DEFINE_SPINLOCK(otgwl_spinlock); struct otgwl_lock { char name[40]; struct wake_lock wakelock; - bool locked; + bool held; }; /* - * VBUS present lock. + * VBUS present lock. Also used as a timed lock on charger + * connect/disconnect and USB host disconnect, to allow the system + * to react to the change in power. */ static struct otgwl_lock vbus_lock; -static void otgwl_grab(struct otgwl_lock *lock) +static void otgwl_hold(struct otgwl_lock *lock) { - if (!lock->locked) { + if (!lock->held) { wake_lock(&lock->wakelock); - lock->locked = true; + lock->held = true; } } +static void otgwl_temporary_hold(struct otgwl_lock *lock) +{ + wake_lock_timeout(&lock->wakelock, + msecs_to_jiffies(TEMPORARY_HOLD_TIME)); + lock->held = false; +} + static void otgwl_drop(struct otgwl_lock *lock) { - if (lock->locked) { + if (lock->held) { wake_unlock(&lock->wakelock); - lock->locked = false; + lock->held = false; } } -static int otgwl_otg_notifications(struct notifier_block *nb, - unsigned long event, void *unused) +static void otgwl_handle_event(unsigned long event) { unsigned long irqflags; - if (!enabled) - return NOTIFY_OK; - spin_lock_irqsave(&otgwl_spinlock, irqflags); + if (!enabled) { + otgwl_drop(&vbus_lock); + spin_unlock_irqrestore(&otgwl_spinlock, irqflags); + return; + } + switch (event) { case USB_EVENT_VBUS: case USB_EVENT_ENUMERATED: - otgwl_grab(&vbus_lock); + otgwl_hold(&vbus_lock); break; case USB_EVENT_NONE: case USB_EVENT_ID: case USB_EVENT_CHARGER: - otgwl_drop(&vbus_lock); + otgwl_temporary_hold(&vbus_lock); break; default: @@ -91,71 +104,25 @@ static int otgwl_otg_notifications(struct notifier_block *nb, } spin_unlock_irqrestore(&otgwl_spinlock, irqflags); - return NOTIFY_OK; -} - -static void sync_with_xceiv_state(void) -{ - if ((otgwl_xceiv->last_event == USB_EVENT_VBUS) || - (otgwl_xceiv->last_event == USB_EVENT_ENUMERATED)) - otgwl_grab(&vbus_lock); - else - otgwl_drop(&vbus_lock); } -static int init_for_xceiv(void) +static int otgwl_otg_notifications(struct notifier_block *nb, + unsigned long event, void *unused) { - int rv; - - if (!otgwl_xceiv) { - otgwl_xceiv = otg_get_transceiver(); - - if (!otgwl_xceiv) { - pr_err("%s: No OTG transceiver found\n", __func__); - return -ENODEV; - } - - snprintf(vbus_lock.name, sizeof(vbus_lock.name), "vbus-%s", - dev_name(otgwl_xceiv->dev)); - wake_lock_init(&vbus_lock.wakelock, WAKE_LOCK_SUSPEND, - vbus_lock.name); - - rv = otg_register_notifier(otgwl_xceiv, &otgwl_nb); - - if (rv) { - pr_err("%s: otg_register_notifier on transceiver %s" - " failed\n", __func__, - dev_name(otgwl_xceiv->dev)); - otgwl_xceiv = NULL; - wake_lock_destroy(&vbus_lock.wakelock); - return rv; - } - } - - return 0; + otgwl_handle_event(event); + return NOTIFY_OK; } static int set_enabled(const char *val, const struct kernel_param *kp) { - unsigned long irqflags; int rv = param_set_bool(val, kp); if (rv) return rv; - rv = init_for_xceiv(); - - if (rv) - return rv; - - spin_lock_irqsave(&otgwl_spinlock, irqflags); - - if (enabled) - sync_with_xceiv_state(); - else - otgwl_drop(&vbus_lock); + if (otgwl_xceiv) + otgwl_handle_event(otgwl_xceiv->last_event); - spin_unlock_irqrestore(&otgwl_spinlock, irqflags); return 0; } @@ -169,22 +136,34 @@ MODULE_PARM_DESC(enabled, "enable wakelock when VBUS present"); static int __init otg_wakelock_init(void) { - unsigned long irqflags; + int ret; - otgwl_nb.notifier_call = otgwl_otg_notifications; + otgwl_xceiv = otg_get_transceiver(); - if (!init_for_xceiv()) { - spin_lock_irqsave(&otgwl_spinlock, irqflags); + if (!otgwl_xceiv) { + pr_err("%s: No OTG transceiver found\n", __func__); + return -ENODEV; + } - if (enabled) - sync_with_xceiv_state(); + snprintf(vbus_lock.name, sizeof(vbus_lock.name), "vbus-%s", + dev_name(otgwl_xceiv->dev)); + wake_lock_init(&vbus_lock.wakelock, WAKE_LOCK_SUSPEND, + vbus_lock.name); - spin_unlock_irqrestore(&otgwl_spinlock, irqflags); - } else { - enabled = false; + otgwl_nb.notifier_call = otgwl_otg_notifications; + ret = otg_register_notifier(otgwl_xceiv, &otgwl_nb); + + if (ret) { + pr_err("%s: otg_register_notifier on transceiver %s" + " failed\n", __func__, + dev_name(otgwl_xceiv->dev)); + otgwl_xceiv = NULL; + wake_lock_destroy(&vbus_lock.wakelock); + return ret; } - return 0; + otgwl_handle_event(otgwl_xceiv->last_event); + return ret; } late_initcall(otg_wakelock_init); -- GitLab From 666167cc54cd9005df15e7641e9adb7245daad97 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 1 Feb 2012 14:23:15 -0800 Subject: [PATCH 0689/1442] ANDROID: usb: otg: otg-wakelock: fix build for 3.3 Add missing module.h include Change-Id: Ib0538ca569c9e0713ceefcd1f91c6bc089d2f2ba Signed-off-by: Colin Cross --- drivers/usb/phy/otg-wakelock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/phy/otg-wakelock.c b/drivers/usb/phy/otg-wakelock.c index 2f11472dd2b3..ffd8d8aa5dc8 100644 --- a/drivers/usb/phy/otg-wakelock.c +++ b/drivers/usb/phy/otg-wakelock.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include -- GitLab From 7a6d39e78fdac89c6d2cde8faef4be6c4d881330 Mon Sep 17 00:00:00 2001 From: Benoit Goby Date: Mon, 19 Dec 2011 14:37:50 -0800 Subject: [PATCH 0690/1442] ANDROID: usb: gadget: mtp: Add MTP/PTP function USB gadget function driver used by the Android framework to implement the MTP and PTP protocols. It creates a character device that provides an interface for fast transfer of files and supports transferring files greater than 4GB. Change-Id: I2d8f2c37029fb37d8deb791d04eb7346f94f5adb Signed-off-by: Mike Lockwood --- drivers/usb/gadget/f_mtp.c | 1283 ++++++++++++++++++++++++++++++++++++ include/linux/usb/f_mtp.h | 75 +++ 2 files changed, 1358 insertions(+) create mode 100644 drivers/usb/gadget/f_mtp.c create mode 100644 include/linux/usb/f_mtp.h diff --git a/drivers/usb/gadget/f_mtp.c b/drivers/usb/gadget/f_mtp.c new file mode 100644 index 000000000000..1638977a5410 --- /dev/null +++ b/drivers/usb/gadget/f_mtp.c @@ -0,0 +1,1283 @@ +/* + * Gadget Function Driver for MTP + * + * Copyright (C) 2010 Google, Inc. + * Author: Mike Lockwood + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* #define DEBUG */ +/* #define VERBOSE_DEBUG */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#define MTP_BULK_BUFFER_SIZE 16384 +#define INTR_BUFFER_SIZE 28 + +/* String IDs */ +#define INTERFACE_STRING_INDEX 0 + +/* values for mtp_dev.state */ +#define STATE_OFFLINE 0 /* initial state, disconnected */ +#define STATE_READY 1 /* ready for userspace calls */ +#define STATE_BUSY 2 /* processing userspace calls */ +#define STATE_CANCELED 3 /* transaction canceled by host */ +#define STATE_ERROR 4 /* error from completion routine */ + +/* number of tx and rx requests to allocate */ +#define TX_REQ_MAX 4 +#define RX_REQ_MAX 2 +#define INTR_REQ_MAX 5 + +/* ID for Microsoft MTP OS String */ +#define MTP_OS_STRING_ID 0xEE + +/* MTP class reqeusts */ +#define MTP_REQ_CANCEL 0x64 +#define MTP_REQ_GET_EXT_EVENT_DATA 0x65 +#define MTP_REQ_RESET 0x66 +#define MTP_REQ_GET_DEVICE_STATUS 0x67 + +/* constants for device status */ +#define MTP_RESPONSE_OK 0x2001 +#define MTP_RESPONSE_DEVICE_BUSY 0x2019 + +static const char mtp_shortname[] = "mtp_usb"; + +struct mtp_dev { + struct usb_function function; + struct usb_composite_dev *cdev; + spinlock_t lock; + + struct usb_ep *ep_in; + struct usb_ep *ep_out; + struct usb_ep *ep_intr; + + int state; + + /* synchronize access to our device file */ + atomic_t open_excl; + /* to enforce only one ioctl at a time */ + atomic_t ioctl_excl; + + struct list_head tx_idle; + struct list_head intr_idle; + + wait_queue_head_t read_wq; + wait_queue_head_t write_wq; + wait_queue_head_t intr_wq; + struct usb_request *rx_req[RX_REQ_MAX]; + int rx_done; + + /* for processing MTP_SEND_FILE, MTP_RECEIVE_FILE and + * MTP_SEND_FILE_WITH_HEADER ioctls on a work queue + */ + struct workqueue_struct *wq; + struct work_struct send_file_work; + struct work_struct receive_file_work; + struct file *xfer_file; + loff_t xfer_file_offset; + int64_t xfer_file_length; + unsigned xfer_send_header; + uint16_t xfer_command; + uint32_t xfer_transaction_id; + int xfer_result; +}; + +static struct usb_interface_descriptor mtp_interface_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bInterfaceNumber = 0, + .bNumEndpoints = 3, + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, + .bInterfaceSubClass = USB_SUBCLASS_VENDOR_SPEC, + .bInterfaceProtocol = 0, +}; + +static struct usb_interface_descriptor ptp_interface_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bInterfaceNumber = 0, + .bNumEndpoints = 3, + .bInterfaceClass = USB_CLASS_STILL_IMAGE, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 1, +}; + +static struct usb_endpoint_descriptor mtp_highspeed_in_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(512), +}; + +static struct usb_endpoint_descriptor mtp_highspeed_out_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_OUT, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(512), +}; + +static struct usb_endpoint_descriptor mtp_fullspeed_in_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_BULK, +}; + +static struct usb_endpoint_descriptor mtp_fullspeed_out_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_OUT, + .bmAttributes = USB_ENDPOINT_XFER_BULK, +}; + +static struct usb_endpoint_descriptor mtp_intr_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_INT, + .wMaxPacketSize = __constant_cpu_to_le16(INTR_BUFFER_SIZE), + .bInterval = 6, +}; + +static struct usb_descriptor_header *fs_mtp_descs[] = { + (struct usb_descriptor_header *) &mtp_interface_desc, + (struct usb_descriptor_header *) &mtp_fullspeed_in_desc, + (struct usb_descriptor_header *) &mtp_fullspeed_out_desc, + (struct usb_descriptor_header *) &mtp_intr_desc, + NULL, +}; + +static struct usb_descriptor_header *hs_mtp_descs[] = { + (struct usb_descriptor_header *) &mtp_interface_desc, + (struct usb_descriptor_header *) &mtp_highspeed_in_desc, + (struct usb_descriptor_header *) &mtp_highspeed_out_desc, + (struct usb_descriptor_header *) &mtp_intr_desc, + NULL, +}; + +static struct usb_descriptor_header *fs_ptp_descs[] = { + (struct usb_descriptor_header *) &ptp_interface_desc, + (struct usb_descriptor_header *) &mtp_fullspeed_in_desc, + (struct usb_descriptor_header *) &mtp_fullspeed_out_desc, + (struct usb_descriptor_header *) &mtp_intr_desc, + NULL, +}; + +static struct usb_descriptor_header *hs_ptp_descs[] = { + (struct usb_descriptor_header *) &ptp_interface_desc, + (struct usb_descriptor_header *) &mtp_highspeed_in_desc, + (struct usb_descriptor_header *) &mtp_highspeed_out_desc, + (struct usb_descriptor_header *) &mtp_intr_desc, + NULL, +}; + +static struct usb_string mtp_string_defs[] = { + /* Naming interface "MTP" so libmtp will recognize us */ + [INTERFACE_STRING_INDEX].s = "MTP", + { }, /* end of list */ +}; + +static struct usb_gadget_strings mtp_string_table = { + .language = 0x0409, /* en-US */ + .strings = mtp_string_defs, +}; + +static struct usb_gadget_strings *mtp_strings[] = { + &mtp_string_table, + NULL, +}; + +/* Microsoft MTP OS String */ +static u8 mtp_os_string[] = { + 18, /* sizeof(mtp_os_string) */ + USB_DT_STRING, + /* Signature field: "MSFT100" */ + 'M', 0, 'S', 0, 'F', 0, 'T', 0, '1', 0, '0', 0, '0', 0, + /* vendor code */ + 1, + /* padding */ + 0 +}; + +/* Microsoft Extended Configuration Descriptor Header Section */ +struct mtp_ext_config_desc_header { + __le32 dwLength; + __u16 bcdVersion; + __le16 wIndex; + __u8 bCount; + __u8 reserved[7]; +}; + +/* Microsoft Extended Configuration Descriptor Function Section */ +struct mtp_ext_config_desc_function { + __u8 bFirstInterfaceNumber; + __u8 bInterfaceCount; + __u8 compatibleID[8]; + __u8 subCompatibleID[8]; + __u8 reserved[6]; +}; + +/* MTP Extended Configuration Descriptor */ +struct { + struct mtp_ext_config_desc_header header; + struct mtp_ext_config_desc_function function; +} mtp_ext_config_desc = { + .header = { + .dwLength = __constant_cpu_to_le32(sizeof(mtp_ext_config_desc)), + .bcdVersion = __constant_cpu_to_le16(0x0100), + .wIndex = __constant_cpu_to_le16(4), + .bCount = __constant_cpu_to_le16(1), + }, + .function = { + .bFirstInterfaceNumber = 0, + .bInterfaceCount = 1, + .compatibleID = { 'M', 'T', 'P' }, + }, +}; + +struct mtp_device_status { + __le16 wLength; + __le16 wCode; +}; + +/* temporary variable used between mtp_open() and mtp_gadget_bind() */ +static struct mtp_dev *_mtp_dev; + +static inline struct mtp_dev *func_to_mtp(struct usb_function *f) +{ + return container_of(f, struct mtp_dev, function); +} + +static struct usb_request *mtp_request_new(struct usb_ep *ep, int buffer_size) +{ + struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) + return NULL; + + /* now allocate buffers for the requests */ + req->buf = kmalloc(buffer_size, GFP_KERNEL); + if (!req->buf) { + usb_ep_free_request(ep, req); + return NULL; + } + + return req; +} + +static void mtp_request_free(struct usb_request *req, struct usb_ep *ep) +{ + if (req) { + kfree(req->buf); + usb_ep_free_request(ep, req); + } +} + +static inline int mtp_lock(atomic_t *excl) +{ + if (atomic_inc_return(excl) == 1) { + return 0; + } else { + atomic_dec(excl); + return -1; + } +} + +static inline void mtp_unlock(atomic_t *excl) +{ + atomic_dec(excl); +} + +/* add a request to the tail of a list */ +static void mtp_req_put(struct mtp_dev *dev, struct list_head *head, + struct usb_request *req) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + list_add_tail(&req->list, head); + spin_unlock_irqrestore(&dev->lock, flags); +} + +/* remove a request from the head of a list */ +static struct usb_request +*mtp_req_get(struct mtp_dev *dev, struct list_head *head) +{ + unsigned long flags; + struct usb_request *req; + + spin_lock_irqsave(&dev->lock, flags); + if (list_empty(head)) { + req = 0; + } else { + req = list_first_entry(head, struct usb_request, list); + list_del(&req->list); + } + spin_unlock_irqrestore(&dev->lock, flags); + return req; +} + +static void mtp_complete_in(struct usb_ep *ep, struct usb_request *req) +{ + struct mtp_dev *dev = _mtp_dev; + + if (req->status != 0) + dev->state = STATE_ERROR; + + mtp_req_put(dev, &dev->tx_idle, req); + + wake_up(&dev->write_wq); +} + +static void mtp_complete_out(struct usb_ep *ep, struct usb_request *req) +{ + struct mtp_dev *dev = _mtp_dev; + + dev->rx_done = 1; + if (req->status != 0) + dev->state = STATE_ERROR; + + wake_up(&dev->read_wq); +} + +static void mtp_complete_intr(struct usb_ep *ep, struct usb_request *req) +{ + struct mtp_dev *dev = _mtp_dev; + + if (req->status != 0) + dev->state = STATE_ERROR; + + mtp_req_put(dev, &dev->intr_idle, req); + + wake_up(&dev->intr_wq); +} + +static int mtp_create_bulk_endpoints(struct mtp_dev *dev, + struct usb_endpoint_descriptor *in_desc, + struct usb_endpoint_descriptor *out_desc, + struct usb_endpoint_descriptor *intr_desc) +{ + struct usb_composite_dev *cdev = dev->cdev; + struct usb_request *req; + struct usb_ep *ep; + int i; + + DBG(cdev, "create_bulk_endpoints dev: %p\n", dev); + + ep = usb_ep_autoconfig(cdev->gadget, in_desc); + if (!ep) { + DBG(cdev, "usb_ep_autoconfig for ep_in failed\n"); + return -ENODEV; + } + DBG(cdev, "usb_ep_autoconfig for ep_in got %s\n", ep->name); + ep->driver_data = dev; /* claim the endpoint */ + dev->ep_in = ep; + + ep = usb_ep_autoconfig(cdev->gadget, out_desc); + if (!ep) { + DBG(cdev, "usb_ep_autoconfig for ep_out failed\n"); + return -ENODEV; + } + DBG(cdev, "usb_ep_autoconfig for mtp ep_out got %s\n", ep->name); + ep->driver_data = dev; /* claim the endpoint */ + dev->ep_out = ep; + + ep = usb_ep_autoconfig(cdev->gadget, out_desc); + if (!ep) { + DBG(cdev, "usb_ep_autoconfig for ep_out failed\n"); + return -ENODEV; + } + DBG(cdev, "usb_ep_autoconfig for mtp ep_out got %s\n", ep->name); + ep->driver_data = dev; /* claim the endpoint */ + dev->ep_out = ep; + + ep = usb_ep_autoconfig(cdev->gadget, intr_desc); + if (!ep) { + DBG(cdev, "usb_ep_autoconfig for ep_intr failed\n"); + return -ENODEV; + } + DBG(cdev, "usb_ep_autoconfig for mtp ep_intr got %s\n", ep->name); + ep->driver_data = dev; /* claim the endpoint */ + dev->ep_intr = ep; + + /* now allocate requests for our endpoints */ + for (i = 0; i < TX_REQ_MAX; i++) { + req = mtp_request_new(dev->ep_in, MTP_BULK_BUFFER_SIZE); + if (!req) + goto fail; + req->complete = mtp_complete_in; + mtp_req_put(dev, &dev->tx_idle, req); + } + for (i = 0; i < RX_REQ_MAX; i++) { + req = mtp_request_new(dev->ep_out, MTP_BULK_BUFFER_SIZE); + if (!req) + goto fail; + req->complete = mtp_complete_out; + dev->rx_req[i] = req; + } + for (i = 0; i < INTR_REQ_MAX; i++) { + req = mtp_request_new(dev->ep_intr, INTR_BUFFER_SIZE); + if (!req) + goto fail; + req->complete = mtp_complete_intr; + mtp_req_put(dev, &dev->intr_idle, req); + } + + return 0; + +fail: + printk(KERN_ERR "mtp_bind() could not allocate requests\n"); + return -1; +} + +static ssize_t mtp_read(struct file *fp, char __user *buf, + size_t count, loff_t *pos) +{ + struct mtp_dev *dev = fp->private_data; + struct usb_composite_dev *cdev = dev->cdev; + struct usb_request *req; + int r = count, xfer; + int ret = 0; + + DBG(cdev, "mtp_read(%d)\n", count); + + if (count > MTP_BULK_BUFFER_SIZE) + return -EINVAL; + + /* we will block until we're online */ + DBG(cdev, "mtp_read: waiting for online state\n"); + ret = wait_event_interruptible(dev->read_wq, + dev->state != STATE_OFFLINE); + if (ret < 0) { + r = ret; + goto done; + } + spin_lock_irq(&dev->lock); + if (dev->state == STATE_CANCELED) { + /* report cancelation to userspace */ + dev->state = STATE_READY; + spin_unlock_irq(&dev->lock); + return -ECANCELED; + } + dev->state = STATE_BUSY; + spin_unlock_irq(&dev->lock); + +requeue_req: + /* queue a request */ + req = dev->rx_req[0]; + req->length = count; + dev->rx_done = 0; + ret = usb_ep_queue(dev->ep_out, req, GFP_KERNEL); + if (ret < 0) { + r = -EIO; + goto done; + } else { + DBG(cdev, "rx %p queue\n", req); + } + + /* wait for a request to complete */ + ret = wait_event_interruptible(dev->read_wq, dev->rx_done); + if (ret < 0) { + r = ret; + usb_ep_dequeue(dev->ep_out, req); + goto done; + } + if (dev->state == STATE_BUSY) { + /* If we got a 0-len packet, throw it back and try again. */ + if (req->actual == 0) + goto requeue_req; + + DBG(cdev, "rx %p %d\n", req, req->actual); + xfer = (req->actual < count) ? req->actual : count; + r = xfer; + if (copy_to_user(buf, req->buf, xfer)) + r = -EFAULT; + } else + r = -EIO; + +done: + spin_lock_irq(&dev->lock); + if (dev->state == STATE_CANCELED) + r = -ECANCELED; + else if (dev->state != STATE_OFFLINE) + dev->state = STATE_READY; + spin_unlock_irq(&dev->lock); + + DBG(cdev, "mtp_read returning %d\n", r); + return r; +} + +static ssize_t mtp_write(struct file *fp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mtp_dev *dev = fp->private_data; + struct usb_composite_dev *cdev = dev->cdev; + struct usb_request *req = 0; + int r = count, xfer; + int sendZLP = 0; + int ret; + + DBG(cdev, "mtp_write(%d)\n", count); + + spin_lock_irq(&dev->lock); + if (dev->state == STATE_CANCELED) { + /* report cancelation to userspace */ + dev->state = STATE_READY; + spin_unlock_irq(&dev->lock); + return -ECANCELED; + } + if (dev->state == STATE_OFFLINE) { + spin_unlock_irq(&dev->lock); + return -ENODEV; + } + dev->state = STATE_BUSY; + spin_unlock_irq(&dev->lock); + + /* we need to send a zero length packet to signal the end of transfer + * if the transfer size is aligned to a packet boundary. + */ + if ((count & (dev->ep_in->maxpacket - 1)) == 0) + sendZLP = 1; + + while (count > 0 || sendZLP) { + /* so we exit after sending ZLP */ + if (count == 0) + sendZLP = 0; + + if (dev->state != STATE_BUSY) { + DBG(cdev, "mtp_write dev->error\n"); + r = -EIO; + break; + } + + /* get an idle tx request to use */ + req = 0; + ret = wait_event_interruptible(dev->write_wq, + ((req = mtp_req_get(dev, &dev->tx_idle)) + || dev->state != STATE_BUSY)); + if (!req) { + r = ret; + break; + } + + if (count > MTP_BULK_BUFFER_SIZE) + xfer = MTP_BULK_BUFFER_SIZE; + else + xfer = count; + if (xfer && copy_from_user(req->buf, buf, xfer)) { + r = -EFAULT; + break; + } + + req->length = xfer; + ret = usb_ep_queue(dev->ep_in, req, GFP_KERNEL); + if (ret < 0) { + DBG(cdev, "mtp_write: xfer error %d\n", ret); + r = -EIO; + break; + } + + buf += xfer; + count -= xfer; + + /* zero this so we don't try to free it on error exit */ + req = 0; + } + + if (req) + mtp_req_put(dev, &dev->tx_idle, req); + + spin_lock_irq(&dev->lock); + if (dev->state == STATE_CANCELED) + r = -ECANCELED; + else if (dev->state != STATE_OFFLINE) + dev->state = STATE_READY; + spin_unlock_irq(&dev->lock); + + DBG(cdev, "mtp_write returning %d\n", r); + return r; +} + +/* read from a local file and write to USB */ +static void send_file_work(struct work_struct *data) +{ + struct mtp_dev *dev = container_of(data, struct mtp_dev, + send_file_work); + struct usb_composite_dev *cdev = dev->cdev; + struct usb_request *req = 0; + struct mtp_data_header *header; + struct file *filp; + loff_t offset; + int64_t count; + int xfer, ret, hdr_size; + int r = 0; + int sendZLP = 0; + + /* read our parameters */ + smp_rmb(); + filp = dev->xfer_file; + offset = dev->xfer_file_offset; + count = dev->xfer_file_length; + + DBG(cdev, "send_file_work(%lld %lld)\n", offset, count); + + if (dev->xfer_send_header) { + hdr_size = sizeof(struct mtp_data_header); + count += hdr_size; + } else { + hdr_size = 0; + } + + /* we need to send a zero length packet to signal the end of transfer + * if the transfer size is aligned to a packet boundary. + */ + if ((count & (dev->ep_in->maxpacket - 1)) == 0) + sendZLP = 1; + + while (count > 0 || sendZLP) { + /* so we exit after sending ZLP */ + if (count == 0) + sendZLP = 0; + + /* get an idle tx request to use */ + req = 0; + ret = wait_event_interruptible(dev->write_wq, + (req = mtp_req_get(dev, &dev->tx_idle)) + || dev->state != STATE_BUSY); + if (dev->state == STATE_CANCELED) { + r = -ECANCELED; + break; + } + if (!req) { + r = ret; + break; + } + + if (count > MTP_BULK_BUFFER_SIZE) + xfer = MTP_BULK_BUFFER_SIZE; + else + xfer = count; + + if (hdr_size) { + /* prepend MTP data header */ + header = (struct mtp_data_header *)req->buf; + header->length = __cpu_to_le32(count); + header->type = __cpu_to_le16(2); /* data packet */ + header->command = __cpu_to_le16(dev->xfer_command); + header->transaction_id = + __cpu_to_le32(dev->xfer_transaction_id); + } + + ret = vfs_read(filp, req->buf + hdr_size, xfer - hdr_size, + &offset); + if (ret < 0) { + r = ret; + break; + } + xfer = ret + hdr_size; + hdr_size = 0; + + req->length = xfer; + ret = usb_ep_queue(dev->ep_in, req, GFP_KERNEL); + if (ret < 0) { + DBG(cdev, "send_file_work: xfer error %d\n", ret); + dev->state = STATE_ERROR; + r = -EIO; + break; + } + + count -= xfer; + + /* zero this so we don't try to free it on error exit */ + req = 0; + } + + if (req) + mtp_req_put(dev, &dev->tx_idle, req); + + DBG(cdev, "send_file_work returning %d\n", r); + /* write the result */ + dev->xfer_result = r; + smp_wmb(); +} + +/* read from USB and write to a local file */ +static void receive_file_work(struct work_struct *data) +{ + struct mtp_dev *dev = container_of(data, struct mtp_dev, + receive_file_work); + struct usb_composite_dev *cdev = dev->cdev; + struct usb_request *read_req = NULL, *write_req = NULL; + struct file *filp; + loff_t offset; + int64_t count; + int ret, cur_buf = 0; + int r = 0; + + /* read our parameters */ + smp_rmb(); + filp = dev->xfer_file; + offset = dev->xfer_file_offset; + count = dev->xfer_file_length; + + DBG(cdev, "receive_file_work(%lld)\n", count); + + while (count > 0 || write_req) { + if (count > 0) { + /* queue a request */ + read_req = dev->rx_req[cur_buf]; + cur_buf = (cur_buf + 1) % RX_REQ_MAX; + + read_req->length = (count > MTP_BULK_BUFFER_SIZE + ? MTP_BULK_BUFFER_SIZE : count); + dev->rx_done = 0; + ret = usb_ep_queue(dev->ep_out, read_req, GFP_KERNEL); + if (ret < 0) { + r = -EIO; + dev->state = STATE_ERROR; + break; + } + } + + if (write_req) { + DBG(cdev, "rx %p %d\n", write_req, write_req->actual); + ret = vfs_write(filp, write_req->buf, write_req->actual, + &offset); + DBG(cdev, "vfs_write %d\n", ret); + if (ret != write_req->actual) { + r = -EIO; + dev->state = STATE_ERROR; + break; + } + write_req = NULL; + } + + if (read_req) { + /* wait for our last read to complete */ + ret = wait_event_interruptible(dev->read_wq, + dev->rx_done || dev->state != STATE_BUSY); + if (dev->state == STATE_CANCELED) { + r = -ECANCELED; + if (!dev->rx_done) + usb_ep_dequeue(dev->ep_out, read_req); + break; + } + /* if xfer_file_length is 0xFFFFFFFF, then we read until + * we get a zero length packet + */ + if (count != 0xFFFFFFFF) + count -= read_req->actual; + if (read_req->actual < read_req->length) { + /* + * short packet is used to signal EOF for + * sizes > 4 gig + */ + DBG(cdev, "got short packet\n"); + count = 0; + } + + write_req = read_req; + read_req = NULL; + } + } + + DBG(cdev, "receive_file_work returning %d\n", r); + /* write the result */ + dev->xfer_result = r; + smp_wmb(); +} + +static int mtp_send_event(struct mtp_dev *dev, struct mtp_event *event) +{ + struct usb_request *req = NULL; + int ret; + int length = event->length; + + DBG(dev->cdev, "mtp_send_event(%d)\n", event->length); + + if (length < 0 || length > INTR_BUFFER_SIZE) + return -EINVAL; + if (dev->state == STATE_OFFLINE) + return -ENODEV; + + ret = wait_event_interruptible_timeout(dev->intr_wq, + (req = mtp_req_get(dev, &dev->intr_idle)), + msecs_to_jiffies(1000)); + if (!req) + return -ETIME; + + if (copy_from_user(req->buf, (void __user *)event->data, length)) { + mtp_req_put(dev, &dev->intr_idle, req); + return -EFAULT; + } + req->length = length; + ret = usb_ep_queue(dev->ep_intr, req, GFP_KERNEL); + if (ret) + mtp_req_put(dev, &dev->intr_idle, req); + + return ret; +} + +static long mtp_ioctl(struct file *fp, unsigned code, unsigned long value) +{ + struct mtp_dev *dev = fp->private_data; + struct file *filp = NULL; + int ret = -EINVAL; + + if (mtp_lock(&dev->ioctl_excl)) + return -EBUSY; + + switch (code) { + case MTP_SEND_FILE: + case MTP_RECEIVE_FILE: + case MTP_SEND_FILE_WITH_HEADER: + { + struct mtp_file_range mfr; + struct work_struct *work; + + spin_lock_irq(&dev->lock); + if (dev->state == STATE_CANCELED) { + /* report cancelation to userspace */ + dev->state = STATE_READY; + spin_unlock_irq(&dev->lock); + ret = -ECANCELED; + goto out; + } + if (dev->state == STATE_OFFLINE) { + spin_unlock_irq(&dev->lock); + ret = -ENODEV; + goto out; + } + dev->state = STATE_BUSY; + spin_unlock_irq(&dev->lock); + + if (copy_from_user(&mfr, (void __user *)value, sizeof(mfr))) { + ret = -EFAULT; + goto fail; + } + /* hold a reference to the file while we are working with it */ + filp = fget(mfr.fd); + if (!filp) { + ret = -EBADF; + goto fail; + } + + /* write the parameters */ + dev->xfer_file = filp; + dev->xfer_file_offset = mfr.offset; + dev->xfer_file_length = mfr.length; + smp_wmb(); + + if (code == MTP_SEND_FILE_WITH_HEADER) { + work = &dev->send_file_work; + dev->xfer_send_header = 1; + dev->xfer_command = mfr.command; + dev->xfer_transaction_id = mfr.transaction_id; + } else if (code == MTP_SEND_FILE) { + work = &dev->send_file_work; + dev->xfer_send_header = 0; + } else { + work = &dev->receive_file_work; + } + + /* We do the file transfer on a work queue so it will run + * in kernel context, which is necessary for vfs_read and + * vfs_write to use our buffers in the kernel address space. + */ + queue_work(dev->wq, work); + /* wait for operation to complete */ + flush_workqueue(dev->wq); + fput(filp); + + /* read the result */ + smp_rmb(); + ret = dev->xfer_result; + break; + } + case MTP_SEND_EVENT: + { + struct mtp_event event; + /* return here so we don't change dev->state below, + * which would interfere with bulk transfer state. + */ + if (copy_from_user(&event, (void __user *)value, sizeof(event))) + ret = -EFAULT; + else + ret = mtp_send_event(dev, &event); + goto out; + } + } + +fail: + spin_lock_irq(&dev->lock); + if (dev->state == STATE_CANCELED) + ret = -ECANCELED; + else if (dev->state != STATE_OFFLINE) + dev->state = STATE_READY; + spin_unlock_irq(&dev->lock); +out: + mtp_unlock(&dev->ioctl_excl); + DBG(dev->cdev, "ioctl returning %d\n", ret); + return ret; +} + +static int mtp_open(struct inode *ip, struct file *fp) +{ + printk(KERN_INFO "mtp_open\n"); + if (mtp_lock(&_mtp_dev->open_excl)) + return -EBUSY; + + /* clear any error condition */ + if (_mtp_dev->state != STATE_OFFLINE) + _mtp_dev->state = STATE_READY; + + fp->private_data = _mtp_dev; + return 0; +} + +static int mtp_release(struct inode *ip, struct file *fp) +{ + printk(KERN_INFO "mtp_release\n"); + + mtp_unlock(&_mtp_dev->open_excl); + return 0; +} + +/* file operations for /dev/mtp_usb */ +static const struct file_operations mtp_fops = { + .owner = THIS_MODULE, + .read = mtp_read, + .write = mtp_write, + .unlocked_ioctl = mtp_ioctl, + .open = mtp_open, + .release = mtp_release, +}; + +static struct miscdevice mtp_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = mtp_shortname, + .fops = &mtp_fops, +}; + +static int mtp_ctrlrequest(struct usb_composite_dev *cdev, + const struct usb_ctrlrequest *ctrl) +{ + struct mtp_dev *dev = _mtp_dev; + int value = -EOPNOTSUPP; + u16 w_index = le16_to_cpu(ctrl->wIndex); + u16 w_value = le16_to_cpu(ctrl->wValue); + u16 w_length = le16_to_cpu(ctrl->wLength); + unsigned long flags; + + VDBG(cdev, "mtp_ctrlrequest " + "%02x.%02x v%04x i%04x l%u\n", + ctrl->bRequestType, ctrl->bRequest, + w_value, w_index, w_length); + + /* Handle MTP OS string */ + if (ctrl->bRequestType == + (USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE) + && ctrl->bRequest == USB_REQ_GET_DESCRIPTOR + && (w_value >> 8) == USB_DT_STRING + && (w_value & 0xFF) == MTP_OS_STRING_ID) { + value = (w_length < sizeof(mtp_os_string) + ? w_length : sizeof(mtp_os_string)); + memcpy(cdev->req->buf, mtp_os_string, value); + } else if ((ctrl->bRequestType & USB_TYPE_MASK) == USB_TYPE_VENDOR) { + /* Handle MTP OS descriptor */ + DBG(cdev, "vendor request: %d index: %d value: %d length: %d\n", + ctrl->bRequest, w_index, w_value, w_length); + + if (ctrl->bRequest == 1 + && (ctrl->bRequestType & USB_DIR_IN) + && (w_index == 4 || w_index == 5)) { + value = (w_length < sizeof(mtp_ext_config_desc) ? + w_length : sizeof(mtp_ext_config_desc)); + memcpy(cdev->req->buf, &mtp_ext_config_desc, value); + } + } else if ((ctrl->bRequestType & USB_TYPE_MASK) == USB_TYPE_CLASS) { + DBG(cdev, "class request: %d index: %d value: %d length: %d\n", + ctrl->bRequest, w_index, w_value, w_length); + + if (ctrl->bRequest == MTP_REQ_CANCEL && w_index == 0 + && w_value == 0) { + DBG(cdev, "MTP_REQ_CANCEL\n"); + + spin_lock_irqsave(&dev->lock, flags); + if (dev->state == STATE_BUSY) { + dev->state = STATE_CANCELED; + wake_up(&dev->read_wq); + wake_up(&dev->write_wq); + } + spin_unlock_irqrestore(&dev->lock, flags); + + /* We need to queue a request to read the remaining + * bytes, but we don't actually need to look at + * the contents. + */ + value = w_length; + } else if (ctrl->bRequest == MTP_REQ_GET_DEVICE_STATUS + && w_index == 0 && w_value == 0) { + struct mtp_device_status *status = cdev->req->buf; + status->wLength = + __constant_cpu_to_le16(sizeof(*status)); + + DBG(cdev, "MTP_REQ_GET_DEVICE_STATUS\n"); + spin_lock_irqsave(&dev->lock, flags); + /* device status is "busy" until we report + * the cancelation to userspace + */ + if (dev->state == STATE_CANCELED) + status->wCode = + __cpu_to_le16(MTP_RESPONSE_DEVICE_BUSY); + else + status->wCode = + __cpu_to_le16(MTP_RESPONSE_OK); + spin_unlock_irqrestore(&dev->lock, flags); + value = sizeof(*status); + } + } + + /* respond with data transfer or status phase? */ + if (value >= 0) { + int rc; + cdev->req->zero = value < w_length; + cdev->req->length = value; + rc = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC); + if (rc < 0) + ERROR(cdev, "%s: response queue error\n", __func__); + } + return value; +} + +static int +mtp_function_bind(struct usb_configuration *c, struct usb_function *f) +{ + struct usb_composite_dev *cdev = c->cdev; + struct mtp_dev *dev = func_to_mtp(f); + int id; + int ret; + + dev->cdev = cdev; + DBG(cdev, "mtp_function_bind dev: %p\n", dev); + + /* allocate interface ID(s) */ + id = usb_interface_id(c, f); + if (id < 0) + return id; + mtp_interface_desc.bInterfaceNumber = id; + + /* allocate endpoints */ + ret = mtp_create_bulk_endpoints(dev, &mtp_fullspeed_in_desc, + &mtp_fullspeed_out_desc, &mtp_intr_desc); + if (ret) + return ret; + + /* support high speed hardware */ + if (gadget_is_dualspeed(c->cdev->gadget)) { + mtp_highspeed_in_desc.bEndpointAddress = + mtp_fullspeed_in_desc.bEndpointAddress; + mtp_highspeed_out_desc.bEndpointAddress = + mtp_fullspeed_out_desc.bEndpointAddress; + } + + DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n", + gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full", + f->name, dev->ep_in->name, dev->ep_out->name); + return 0; +} + +static void +mtp_function_unbind(struct usb_configuration *c, struct usb_function *f) +{ + struct mtp_dev *dev = func_to_mtp(f); + struct usb_request *req; + int i; + + while ((req = mtp_req_get(dev, &dev->tx_idle))) + mtp_request_free(req, dev->ep_in); + for (i = 0; i < RX_REQ_MAX; i++) + mtp_request_free(dev->rx_req[i], dev->ep_out); + while ((req = mtp_req_get(dev, &dev->intr_idle))) + mtp_request_free(req, dev->ep_intr); + dev->state = STATE_OFFLINE; +} + +static int mtp_function_set_alt(struct usb_function *f, + unsigned intf, unsigned alt) +{ + struct mtp_dev *dev = func_to_mtp(f); + struct usb_composite_dev *cdev = f->config->cdev; + int ret; + + DBG(cdev, "mtp_function_set_alt intf: %d alt: %d\n", intf, alt); + + ret = config_ep_by_speed(cdev->gadget, f, dev->ep_in); + if (ret) + return ret; + + ret = usb_ep_enable(dev->ep_in); + if (ret) + return ret; + + ret = config_ep_by_speed(cdev->gadget, f, dev->ep_out); + if (ret) + return ret; + + ret = usb_ep_enable(dev->ep_out); + if (ret) { + usb_ep_disable(dev->ep_in); + return ret; + } + + ret = config_ep_by_speed(cdev->gadget, f, dev->ep_intr); + if (ret) + return ret; + + ret = usb_ep_enable(dev->ep_intr); + if (ret) { + usb_ep_disable(dev->ep_out); + usb_ep_disable(dev->ep_in); + return ret; + } + dev->state = STATE_READY; + + /* readers may be blocked waiting for us to go online */ + wake_up(&dev->read_wq); + return 0; +} + +static void mtp_function_disable(struct usb_function *f) +{ + struct mtp_dev *dev = func_to_mtp(f); + struct usb_composite_dev *cdev = dev->cdev; + + DBG(cdev, "mtp_function_disable\n"); + dev->state = STATE_OFFLINE; + usb_ep_disable(dev->ep_in); + usb_ep_disable(dev->ep_out); + usb_ep_disable(dev->ep_intr); + + /* readers may be blocked waiting for us to go online */ + wake_up(&dev->read_wq); + + VDBG(cdev, "%s disabled\n", dev->function.name); +} + +static int mtp_bind_config(struct usb_configuration *c, bool ptp_config) +{ + struct mtp_dev *dev = _mtp_dev; + int ret = 0; + + printk(KERN_INFO "mtp_bind_config\n"); + + /* allocate a string ID for our interface */ + if (mtp_string_defs[INTERFACE_STRING_INDEX].id == 0) { + ret = usb_string_id(c->cdev); + if (ret < 0) + return ret; + mtp_string_defs[INTERFACE_STRING_INDEX].id = ret; + mtp_interface_desc.iInterface = ret; + } + + dev->cdev = c->cdev; + dev->function.name = "mtp"; + dev->function.strings = mtp_strings; + if (ptp_config) { + dev->function.descriptors = fs_ptp_descs; + dev->function.hs_descriptors = hs_ptp_descs; + } else { + dev->function.descriptors = fs_mtp_descs; + dev->function.hs_descriptors = hs_mtp_descs; + } + dev->function.bind = mtp_function_bind; + dev->function.unbind = mtp_function_unbind; + dev->function.set_alt = mtp_function_set_alt; + dev->function.disable = mtp_function_disable; + + return usb_add_function(c, &dev->function); +} + +static int mtp_setup(void) +{ + struct mtp_dev *dev; + int ret; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + spin_lock_init(&dev->lock); + init_waitqueue_head(&dev->read_wq); + init_waitqueue_head(&dev->write_wq); + init_waitqueue_head(&dev->intr_wq); + atomic_set(&dev->open_excl, 0); + atomic_set(&dev->ioctl_excl, 0); + INIT_LIST_HEAD(&dev->tx_idle); + INIT_LIST_HEAD(&dev->intr_idle); + + dev->wq = create_singlethread_workqueue("f_mtp"); + if (!dev->wq) { + ret = -ENOMEM; + goto err1; + } + INIT_WORK(&dev->send_file_work, send_file_work); + INIT_WORK(&dev->receive_file_work, receive_file_work); + + _mtp_dev = dev; + + ret = misc_register(&mtp_device); + if (ret) + goto err2; + + return 0; + +err2: + destroy_workqueue(dev->wq); +err1: + _mtp_dev = NULL; + kfree(dev); + printk(KERN_ERR "mtp gadget driver failed to initialize\n"); + return ret; +} + +static void mtp_cleanup(void) +{ + struct mtp_dev *dev = _mtp_dev; + + if (!dev) + return; + + misc_deregister(&mtp_device); + destroy_workqueue(dev->wq); + _mtp_dev = NULL; + kfree(dev); +} diff --git a/include/linux/usb/f_mtp.h b/include/linux/usb/f_mtp.h new file mode 100644 index 000000000000..72a432e2fcdd --- /dev/null +++ b/include/linux/usb/f_mtp.h @@ -0,0 +1,75 @@ +/* + * Gadget Function Driver for MTP + * + * Copyright (C) 2010 Google, Inc. + * Author: Mike Lockwood + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __LINUX_USB_F_MTP_H +#define __LINUX_USB_F_MTP_H + +#include + +#ifdef __KERNEL__ + +struct mtp_data_header { + /* length of packet, including this header */ + uint32_t length; + /* container type (2 for data packet) */ + uint16_t type; + /* MTP command code */ + uint16_t command; + /* MTP transaction ID */ + uint32_t transaction_id; +}; + +#endif /* __KERNEL__ */ + +struct mtp_file_range { + /* file descriptor for file to transfer */ + int fd; + /* offset in file for start of transfer */ + loff_t offset; + /* number of bytes to transfer */ + int64_t length; + /* MTP command ID for data header, + * used only for MTP_SEND_FILE_WITH_HEADER + */ + uint16_t command; + /* MTP transaction ID for data header, + * used only for MTP_SEND_FILE_WITH_HEADER + */ + uint32_t transaction_id; +}; + +struct mtp_event { + /* size of the event */ + size_t length; + /* event data to send */ + void *data; +}; + +/* Sends the specified file range to the host */ +#define MTP_SEND_FILE _IOW('M', 0, struct mtp_file_range) +/* Receives data from the host and writes it to a file. + * The file is created if it does not exist. + */ +#define MTP_RECEIVE_FILE _IOW('M', 1, struct mtp_file_range) +/* Sends an event to the host via the interrupt endpoint */ +#define MTP_SEND_EVENT _IOW('M', 3, struct mtp_event) +/* Sends the specified file range to the host, + * with a 12 byte MTP data packet header at the beginning. + */ +#define MTP_SEND_FILE_WITH_HEADER _IOW('M', 4, struct mtp_file_range) + +#endif /* __LINUX_USB_F_MTP_H */ -- GitLab From 7719a890fab191820fea5163eb8ba2b8cee65e26 Mon Sep 17 00:00:00 2001 From: Benoit Goby Date: Mon, 19 Dec 2011 14:38:41 -0800 Subject: [PATCH 0691/1442] ANDROID: usb: gadget: adb: Add ADB function Android Debug Bridge (adb) is a command line tool that lets users communicate with a Android-powered device. It is used mainly to debug applications and tranfer files. f_adb implements the transport layer between the ADB Server (on the host) and the ADBD daemon (on the device). Change-Id: Ib11672fa3439dcb3a6588774b132b5a85e03e8ba Signed-off-by: Mike Lockwood --- drivers/usb/gadget/f_adb.c | 611 +++++++++++++++++++++++++++++++++++++ 1 file changed, 611 insertions(+) create mode 100644 drivers/usb/gadget/f_adb.c diff --git a/drivers/usb/gadget/f_adb.c b/drivers/usb/gadget/f_adb.c new file mode 100644 index 000000000000..5415353ab2c1 --- /dev/null +++ b/drivers/usb/gadget/f_adb.c @@ -0,0 +1,611 @@ +/* + * Gadget Driver for Android ADB + * + * Copyright (C) 2008 Google, Inc. + * Author: Mike Lockwood + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ADB_BULK_BUFFER_SIZE 4096 + +/* number of tx requests to allocate */ +#define TX_REQ_MAX 4 + +static const char adb_shortname[] = "android_adb"; + +struct adb_dev { + struct usb_function function; + struct usb_composite_dev *cdev; + spinlock_t lock; + + struct usb_ep *ep_in; + struct usb_ep *ep_out; + + int online; + int error; + + atomic_t read_excl; + atomic_t write_excl; + atomic_t open_excl; + + struct list_head tx_idle; + + wait_queue_head_t read_wq; + wait_queue_head_t write_wq; + struct usb_request *rx_req; + int rx_done; +}; + +static struct usb_interface_descriptor adb_interface_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bInterfaceNumber = 0, + .bNumEndpoints = 2, + .bInterfaceClass = 0xFF, + .bInterfaceSubClass = 0x42, + .bInterfaceProtocol = 1, +}; + +static struct usb_endpoint_descriptor adb_highspeed_in_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(512), +}; + +static struct usb_endpoint_descriptor adb_highspeed_out_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_OUT, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(512), +}; + +static struct usb_endpoint_descriptor adb_fullspeed_in_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_BULK, +}; + +static struct usb_endpoint_descriptor adb_fullspeed_out_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_OUT, + .bmAttributes = USB_ENDPOINT_XFER_BULK, +}; + +static struct usb_descriptor_header *fs_adb_descs[] = { + (struct usb_descriptor_header *) &adb_interface_desc, + (struct usb_descriptor_header *) &adb_fullspeed_in_desc, + (struct usb_descriptor_header *) &adb_fullspeed_out_desc, + NULL, +}; + +static struct usb_descriptor_header *hs_adb_descs[] = { + (struct usb_descriptor_header *) &adb_interface_desc, + (struct usb_descriptor_header *) &adb_highspeed_in_desc, + (struct usb_descriptor_header *) &adb_highspeed_out_desc, + NULL, +}; + + +/* temporary variable used between adb_open() and adb_gadget_bind() */ +static struct adb_dev *_adb_dev; + +static inline struct adb_dev *func_to_adb(struct usb_function *f) +{ + return container_of(f, struct adb_dev, function); +} + + +static struct usb_request *adb_request_new(struct usb_ep *ep, int buffer_size) +{ + struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) + return NULL; + + /* now allocate buffers for the requests */ + req->buf = kmalloc(buffer_size, GFP_KERNEL); + if (!req->buf) { + usb_ep_free_request(ep, req); + return NULL; + } + + return req; +} + +static void adb_request_free(struct usb_request *req, struct usb_ep *ep) +{ + if (req) { + kfree(req->buf); + usb_ep_free_request(ep, req); + } +} + +static inline int adb_lock(atomic_t *excl) +{ + if (atomic_inc_return(excl) == 1) { + return 0; + } else { + atomic_dec(excl); + return -1; + } +} + +static inline void adb_unlock(atomic_t *excl) +{ + atomic_dec(excl); +} + +/* add a request to the tail of a list */ +void adb_req_put(struct adb_dev *dev, struct list_head *head, + struct usb_request *req) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + list_add_tail(&req->list, head); + spin_unlock_irqrestore(&dev->lock, flags); +} + +/* remove a request from the head of a list */ +struct usb_request *adb_req_get(struct adb_dev *dev, struct list_head *head) +{ + unsigned long flags; + struct usb_request *req; + + spin_lock_irqsave(&dev->lock, flags); + if (list_empty(head)) { + req = 0; + } else { + req = list_first_entry(head, struct usb_request, list); + list_del(&req->list); + } + spin_unlock_irqrestore(&dev->lock, flags); + return req; +} + +static void adb_complete_in(struct usb_ep *ep, struct usb_request *req) +{ + struct adb_dev *dev = _adb_dev; + + if (req->status != 0) + dev->error = 1; + + adb_req_put(dev, &dev->tx_idle, req); + + wake_up(&dev->write_wq); +} + +static void adb_complete_out(struct usb_ep *ep, struct usb_request *req) +{ + struct adb_dev *dev = _adb_dev; + + dev->rx_done = 1; + if (req->status != 0) + dev->error = 1; + + wake_up(&dev->read_wq); +} + +static int adb_create_bulk_endpoints(struct adb_dev *dev, + struct usb_endpoint_descriptor *in_desc, + struct usb_endpoint_descriptor *out_desc) +{ + struct usb_composite_dev *cdev = dev->cdev; + struct usb_request *req; + struct usb_ep *ep; + int i; + + DBG(cdev, "create_bulk_endpoints dev: %p\n", dev); + + ep = usb_ep_autoconfig(cdev->gadget, in_desc); + if (!ep) { + DBG(cdev, "usb_ep_autoconfig for ep_in failed\n"); + return -ENODEV; + } + DBG(cdev, "usb_ep_autoconfig for ep_in got %s\n", ep->name); + ep->driver_data = dev; /* claim the endpoint */ + dev->ep_in = ep; + + ep = usb_ep_autoconfig(cdev->gadget, out_desc); + if (!ep) { + DBG(cdev, "usb_ep_autoconfig for ep_out failed\n"); + return -ENODEV; + } + DBG(cdev, "usb_ep_autoconfig for adb ep_out got %s\n", ep->name); + ep->driver_data = dev; /* claim the endpoint */ + dev->ep_out = ep; + + /* now allocate requests for our endpoints */ + req = adb_request_new(dev->ep_out, ADB_BULK_BUFFER_SIZE); + if (!req) + goto fail; + req->complete = adb_complete_out; + dev->rx_req = req; + + for (i = 0; i < TX_REQ_MAX; i++) { + req = adb_request_new(dev->ep_in, ADB_BULK_BUFFER_SIZE); + if (!req) + goto fail; + req->complete = adb_complete_in; + adb_req_put(dev, &dev->tx_idle, req); + } + + return 0; + +fail: + printk(KERN_ERR "adb_bind() could not allocate requests\n"); + return -1; +} + +static ssize_t adb_read(struct file *fp, char __user *buf, + size_t count, loff_t *pos) +{ + struct adb_dev *dev = fp->private_data; + struct usb_request *req; + int r = count, xfer; + int ret; + + pr_debug("adb_read(%d)\n", count); + if (!_adb_dev) + return -ENODEV; + + if (count > ADB_BULK_BUFFER_SIZE) + return -EINVAL; + + if (adb_lock(&dev->read_excl)) + return -EBUSY; + + /* we will block until we're online */ + while (!(dev->online || dev->error)) { + pr_debug("adb_read: waiting for online state\n"); + ret = wait_event_interruptible(dev->read_wq, + (dev->online || dev->error)); + if (ret < 0) { + adb_unlock(&dev->read_excl); + return ret; + } + } + if (dev->error) { + r = -EIO; + goto done; + } + +requeue_req: + /* queue a request */ + req = dev->rx_req; + req->length = count; + dev->rx_done = 0; + ret = usb_ep_queue(dev->ep_out, req, GFP_ATOMIC); + if (ret < 0) { + pr_debug("adb_read: failed to queue req %p (%d)\n", req, ret); + r = -EIO; + dev->error = 1; + goto done; + } else { + pr_debug("rx %p queue\n", req); + } + + /* wait for a request to complete */ + ret = wait_event_interruptible(dev->read_wq, dev->rx_done); + if (ret < 0) { + dev->error = 1; + r = ret; + usb_ep_dequeue(dev->ep_out, req); + goto done; + } + if (!dev->error) { + /* If we got a 0-len packet, throw it back and try again. */ + if (req->actual == 0) + goto requeue_req; + + pr_debug("rx %p %d\n", req, req->actual); + xfer = (req->actual < count) ? req->actual : count; + if (copy_to_user(buf, req->buf, xfer)) + r = -EFAULT; + + } else + r = -EIO; + +done: + adb_unlock(&dev->read_excl); + pr_debug("adb_read returning %d\n", r); + return r; +} + +static ssize_t adb_write(struct file *fp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct adb_dev *dev = fp->private_data; + struct usb_request *req = 0; + int r = count, xfer; + int ret; + + if (!_adb_dev) + return -ENODEV; + pr_debug("adb_write(%d)\n", count); + + if (adb_lock(&dev->write_excl)) + return -EBUSY; + + while (count > 0) { + if (dev->error) { + pr_debug("adb_write dev->error\n"); + r = -EIO; + break; + } + + /* get an idle tx request to use */ + req = 0; + ret = wait_event_interruptible(dev->write_wq, + (req = adb_req_get(dev, &dev->tx_idle)) || dev->error); + + if (ret < 0) { + r = ret; + break; + } + + if (req != 0) { + if (count > ADB_BULK_BUFFER_SIZE) + xfer = ADB_BULK_BUFFER_SIZE; + else + xfer = count; + if (copy_from_user(req->buf, buf, xfer)) { + r = -EFAULT; + break; + } + + req->length = xfer; + ret = usb_ep_queue(dev->ep_in, req, GFP_ATOMIC); + if (ret < 0) { + pr_debug("adb_write: xfer error %d\n", ret); + dev->error = 1; + r = -EIO; + break; + } + + buf += xfer; + count -= xfer; + + /* zero this so we don't try to free it on error exit */ + req = 0; + } + } + + if (req) + adb_req_put(dev, &dev->tx_idle, req); + + adb_unlock(&dev->write_excl); + pr_debug("adb_write returning %d\n", r); + return r; +} + +static int adb_open(struct inode *ip, struct file *fp) +{ + printk(KERN_INFO "adb_open\n"); + if (!_adb_dev) + return -ENODEV; + + if (adb_lock(&_adb_dev->open_excl)) + return -EBUSY; + + fp->private_data = _adb_dev; + + /* clear the error latch */ + _adb_dev->error = 0; + + return 0; +} + +static int adb_release(struct inode *ip, struct file *fp) +{ + printk(KERN_INFO "adb_release\n"); + adb_unlock(&_adb_dev->open_excl); + return 0; +} + +/* file operations for ADB device /dev/android_adb */ +static const struct file_operations adb_fops = { + .owner = THIS_MODULE, + .read = adb_read, + .write = adb_write, + .open = adb_open, + .release = adb_release, +}; + +static struct miscdevice adb_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = adb_shortname, + .fops = &adb_fops, +}; + + + + +static int +adb_function_bind(struct usb_configuration *c, struct usb_function *f) +{ + struct usb_composite_dev *cdev = c->cdev; + struct adb_dev *dev = func_to_adb(f); + int id; + int ret; + + dev->cdev = cdev; + DBG(cdev, "adb_function_bind dev: %p\n", dev); + + /* allocate interface ID(s) */ + id = usb_interface_id(c, f); + if (id < 0) + return id; + adb_interface_desc.bInterfaceNumber = id; + + /* allocate endpoints */ + ret = adb_create_bulk_endpoints(dev, &adb_fullspeed_in_desc, + &adb_fullspeed_out_desc); + if (ret) + return ret; + + /* support high speed hardware */ + if (gadget_is_dualspeed(c->cdev->gadget)) { + adb_highspeed_in_desc.bEndpointAddress = + adb_fullspeed_in_desc.bEndpointAddress; + adb_highspeed_out_desc.bEndpointAddress = + adb_fullspeed_out_desc.bEndpointAddress; + } + + DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n", + gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full", + f->name, dev->ep_in->name, dev->ep_out->name); + return 0; +} + +static void +adb_function_unbind(struct usb_configuration *c, struct usb_function *f) +{ + struct adb_dev *dev = func_to_adb(f); + struct usb_request *req; + + + dev->online = 0; + dev->error = 1; + + wake_up(&dev->read_wq); + + adb_request_free(dev->rx_req, dev->ep_out); + while ((req = adb_req_get(dev, &dev->tx_idle))) + adb_request_free(req, dev->ep_in); +} + +static int adb_function_set_alt(struct usb_function *f, + unsigned intf, unsigned alt) +{ + struct adb_dev *dev = func_to_adb(f); + struct usb_composite_dev *cdev = f->config->cdev; + int ret; + + DBG(cdev, "adb_function_set_alt intf: %d alt: %d\n", intf, alt); + + ret = config_ep_by_speed(cdev->gadget, f, dev->ep_in); + if (ret) + return ret; + + ret = usb_ep_enable(dev->ep_in); + if (ret) + return ret; + + ret = config_ep_by_speed(cdev->gadget, f, dev->ep_out); + if (ret) + return ret; + + ret = usb_ep_enable(dev->ep_out); + if (ret) { + usb_ep_disable(dev->ep_in); + return ret; + } + dev->online = 1; + + /* readers may be blocked waiting for us to go online */ + wake_up(&dev->read_wq); + return 0; +} + +static void adb_function_disable(struct usb_function *f) +{ + struct adb_dev *dev = func_to_adb(f); + struct usb_composite_dev *cdev = dev->cdev; + + DBG(cdev, "adb_function_disable cdev %p\n", cdev); + dev->online = 0; + dev->error = 1; + usb_ep_disable(dev->ep_in); + usb_ep_disable(dev->ep_out); + + /* readers may be blocked waiting for us to go online */ + wake_up(&dev->read_wq); + + VDBG(cdev, "%s disabled\n", dev->function.name); +} + +static int adb_bind_config(struct usb_configuration *c) +{ + struct adb_dev *dev = _adb_dev; + + printk(KERN_INFO "adb_bind_config\n"); + + dev->cdev = c->cdev; + dev->function.name = "adb"; + dev->function.descriptors = fs_adb_descs; + dev->function.hs_descriptors = hs_adb_descs; + dev->function.bind = adb_function_bind; + dev->function.unbind = adb_function_unbind; + dev->function.set_alt = adb_function_set_alt; + dev->function.disable = adb_function_disable; + + return usb_add_function(c, &dev->function); +} + +static int adb_setup(void) +{ + struct adb_dev *dev; + int ret; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + spin_lock_init(&dev->lock); + + init_waitqueue_head(&dev->read_wq); + init_waitqueue_head(&dev->write_wq); + + atomic_set(&dev->open_excl, 0); + atomic_set(&dev->read_excl, 0); + atomic_set(&dev->write_excl, 0); + + INIT_LIST_HEAD(&dev->tx_idle); + + _adb_dev = dev; + + ret = misc_register(&adb_device); + if (ret) + goto err; + + return 0; + +err: + kfree(dev); + printk(KERN_ERR "adb gadget driver failed to initialize\n"); + return ret; +} + +static void adb_cleanup(void) +{ + misc_deregister(&adb_device); + + kfree(_adb_dev); + _adb_dev = NULL; +} -- GitLab From 3d9552718c700d1df0677be81895c6493f7ba618 Mon Sep 17 00:00:00 2001 From: Benoit Goby Date: Mon, 19 Dec 2011 14:39:37 -0800 Subject: [PATCH 0692/1442] ANDROID: usb: gadget: accessory: Add Android Accessory function USB accessory mode allows users to connect USB host hardware specifically designed for Android-powered devices. The accessories must adhere to the Android accessory protocol outlined in the http://accessories.android.com documentation. This allows Android devices that cannot act as a USB host to still interact with USB hardware. When an Android device is in USB accessory mode, the attached Android USB accessory acts as the host, provides power to the USB bus, and enumerates connected devices. Change-Id: I67964b50d278f3c0471d47efbb7b0973a3502681 Signed-off-by: Mike Lockwood --- drivers/usb/gadget/f_accessory.c | 796 +++++++++++++++++++++++++++++++ include/linux/usb/f_accessory.h | 83 ++++ 2 files changed, 879 insertions(+) create mode 100644 drivers/usb/gadget/f_accessory.c create mode 100644 include/linux/usb/f_accessory.h diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c new file mode 100644 index 000000000000..a5818227611a --- /dev/null +++ b/drivers/usb/gadget/f_accessory.c @@ -0,0 +1,796 @@ +/* + * Gadget Function Driver for Android USB accessories + * + * Copyright (C) 2011 Google, Inc. + * Author: Mike Lockwood + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* #define DEBUG */ +/* #define VERBOSE_DEBUG */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#define BULK_BUFFER_SIZE 16384 +#define ACC_STRING_SIZE 256 + +#define PROTOCOL_VERSION 1 + +/* String IDs */ +#define INTERFACE_STRING_INDEX 0 + +/* number of tx and rx requests to allocate */ +#define TX_REQ_MAX 4 +#define RX_REQ_MAX 2 + +struct acc_dev { + struct usb_function function; + struct usb_composite_dev *cdev; + spinlock_t lock; + + struct usb_ep *ep_in; + struct usb_ep *ep_out; + + /* set to 1 when we connect */ + int online:1; + /* Set to 1 when we disconnect. + * Not cleared until our file is closed. + */ + int disconnected:1; + + /* strings sent by the host */ + char manufacturer[ACC_STRING_SIZE]; + char model[ACC_STRING_SIZE]; + char description[ACC_STRING_SIZE]; + char version[ACC_STRING_SIZE]; + char uri[ACC_STRING_SIZE]; + char serial[ACC_STRING_SIZE]; + + /* for acc_complete_set_string */ + int string_index; + + /* set to 1 if we have a pending start request */ + int start_requested; + + /* synchronize access to our device file */ + atomic_t open_excl; + + struct list_head tx_idle; + + wait_queue_head_t read_wq; + wait_queue_head_t write_wq; + struct usb_request *rx_req[RX_REQ_MAX]; + int rx_done; + struct delayed_work work; +}; + +static struct usb_interface_descriptor acc_interface_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bInterfaceNumber = 0, + .bNumEndpoints = 2, + .bInterfaceClass = USB_CLASS_VENDOR_SPEC, + .bInterfaceSubClass = USB_SUBCLASS_VENDOR_SPEC, + .bInterfaceProtocol = 0, +}; + +static struct usb_endpoint_descriptor acc_highspeed_in_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(512), +}; + +static struct usb_endpoint_descriptor acc_highspeed_out_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_OUT, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(512), +}; + +static struct usb_endpoint_descriptor acc_fullspeed_in_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_BULK, +}; + +static struct usb_endpoint_descriptor acc_fullspeed_out_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_OUT, + .bmAttributes = USB_ENDPOINT_XFER_BULK, +}; + +static struct usb_descriptor_header *fs_acc_descs[] = { + (struct usb_descriptor_header *) &acc_interface_desc, + (struct usb_descriptor_header *) &acc_fullspeed_in_desc, + (struct usb_descriptor_header *) &acc_fullspeed_out_desc, + NULL, +}; + +static struct usb_descriptor_header *hs_acc_descs[] = { + (struct usb_descriptor_header *) &acc_interface_desc, + (struct usb_descriptor_header *) &acc_highspeed_in_desc, + (struct usb_descriptor_header *) &acc_highspeed_out_desc, + NULL, +}; + +static struct usb_string acc_string_defs[] = { + [INTERFACE_STRING_INDEX].s = "Android Accessory Interface", + { }, /* end of list */ +}; + +static struct usb_gadget_strings acc_string_table = { + .language = 0x0409, /* en-US */ + .strings = acc_string_defs, +}; + +static struct usb_gadget_strings *acc_strings[] = { + &acc_string_table, + NULL, +}; + +/* temporary variable used between acc_open() and acc_gadget_bind() */ +static struct acc_dev *_acc_dev; + +static inline struct acc_dev *func_to_dev(struct usb_function *f) +{ + return container_of(f, struct acc_dev, function); +} + +static struct usb_request *acc_request_new(struct usb_ep *ep, int buffer_size) +{ + struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) + return NULL; + + /* now allocate buffers for the requests */ + req->buf = kmalloc(buffer_size, GFP_KERNEL); + if (!req->buf) { + usb_ep_free_request(ep, req); + return NULL; + } + + return req; +} + +static void acc_request_free(struct usb_request *req, struct usb_ep *ep) +{ + if (req) { + kfree(req->buf); + usb_ep_free_request(ep, req); + } +} + +/* add a request to the tail of a list */ +static void req_put(struct acc_dev *dev, struct list_head *head, + struct usb_request *req) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + list_add_tail(&req->list, head); + spin_unlock_irqrestore(&dev->lock, flags); +} + +/* remove a request from the head of a list */ +static struct usb_request *req_get(struct acc_dev *dev, struct list_head *head) +{ + unsigned long flags; + struct usb_request *req; + + spin_lock_irqsave(&dev->lock, flags); + if (list_empty(head)) { + req = 0; + } else { + req = list_first_entry(head, struct usb_request, list); + list_del(&req->list); + } + spin_unlock_irqrestore(&dev->lock, flags); + return req; +} + +static void acc_set_disconnected(struct acc_dev *dev) +{ + dev->online = 0; + dev->disconnected = 1; +} + +static void acc_complete_in(struct usb_ep *ep, struct usb_request *req) +{ + struct acc_dev *dev = _acc_dev; + + if (req->status != 0) + acc_set_disconnected(dev); + + req_put(dev, &dev->tx_idle, req); + + wake_up(&dev->write_wq); +} + +static void acc_complete_out(struct usb_ep *ep, struct usb_request *req) +{ + struct acc_dev *dev = _acc_dev; + + dev->rx_done = 1; + if (req->status != 0) + acc_set_disconnected(dev); + + wake_up(&dev->read_wq); +} + +static void acc_complete_set_string(struct usb_ep *ep, struct usb_request *req) +{ + struct acc_dev *dev = ep->driver_data; + char *string_dest = NULL; + int length = req->actual; + + if (req->status != 0) { + pr_err("acc_complete_set_string, err %d\n", req->status); + return; + } + + switch (dev->string_index) { + case ACCESSORY_STRING_MANUFACTURER: + string_dest = dev->manufacturer; + break; + case ACCESSORY_STRING_MODEL: + string_dest = dev->model; + break; + case ACCESSORY_STRING_DESCRIPTION: + string_dest = dev->description; + break; + case ACCESSORY_STRING_VERSION: + string_dest = dev->version; + break; + case ACCESSORY_STRING_URI: + string_dest = dev->uri; + break; + case ACCESSORY_STRING_SERIAL: + string_dest = dev->serial; + break; + } + if (string_dest) { + unsigned long flags; + + if (length >= ACC_STRING_SIZE) + length = ACC_STRING_SIZE - 1; + + spin_lock_irqsave(&dev->lock, flags); + memcpy(string_dest, req->buf, length); + /* ensure zero termination */ + string_dest[length] = 0; + spin_unlock_irqrestore(&dev->lock, flags); + } else { + pr_err("unknown accessory string index %d\n", + dev->string_index); + } +} + +static int __init create_bulk_endpoints(struct acc_dev *dev, + struct usb_endpoint_descriptor *in_desc, + struct usb_endpoint_descriptor *out_desc) +{ + struct usb_composite_dev *cdev = dev->cdev; + struct usb_request *req; + struct usb_ep *ep; + int i; + + DBG(cdev, "create_bulk_endpoints dev: %p\n", dev); + + ep = usb_ep_autoconfig(cdev->gadget, in_desc); + if (!ep) { + DBG(cdev, "usb_ep_autoconfig for ep_in failed\n"); + return -ENODEV; + } + DBG(cdev, "usb_ep_autoconfig for ep_in got %s\n", ep->name); + ep->driver_data = dev; /* claim the endpoint */ + dev->ep_in = ep; + + ep = usb_ep_autoconfig(cdev->gadget, out_desc); + if (!ep) { + DBG(cdev, "usb_ep_autoconfig for ep_out failed\n"); + return -ENODEV; + } + DBG(cdev, "usb_ep_autoconfig for ep_out got %s\n", ep->name); + ep->driver_data = dev; /* claim the endpoint */ + dev->ep_out = ep; + + ep = usb_ep_autoconfig(cdev->gadget, out_desc); + if (!ep) { + DBG(cdev, "usb_ep_autoconfig for ep_out failed\n"); + return -ENODEV; + } + DBG(cdev, "usb_ep_autoconfig for ep_out got %s\n", ep->name); + ep->driver_data = dev; /* claim the endpoint */ + dev->ep_out = ep; + + /* now allocate requests for our endpoints */ + for (i = 0; i < TX_REQ_MAX; i++) { + req = acc_request_new(dev->ep_in, BULK_BUFFER_SIZE); + if (!req) + goto fail; + req->complete = acc_complete_in; + req_put(dev, &dev->tx_idle, req); + } + for (i = 0; i < RX_REQ_MAX; i++) { + req = acc_request_new(dev->ep_out, BULK_BUFFER_SIZE); + if (!req) + goto fail; + req->complete = acc_complete_out; + dev->rx_req[i] = req; + } + + return 0; + +fail: + printk(KERN_ERR "acc_bind() could not allocate requests\n"); + while ((req = req_get(dev, &dev->tx_idle))) + acc_request_free(req, dev->ep_in); + for (i = 0; i < RX_REQ_MAX; i++) + acc_request_free(dev->rx_req[i], dev->ep_out); + return -1; +} + +static ssize_t acc_read(struct file *fp, char __user *buf, + size_t count, loff_t *pos) +{ + struct acc_dev *dev = fp->private_data; + struct usb_request *req; + int r = count, xfer; + int ret = 0; + + pr_debug("acc_read(%d)\n", count); + + if (dev->disconnected) + return -ENODEV; + + if (count > BULK_BUFFER_SIZE) + count = BULK_BUFFER_SIZE; + + /* we will block until we're online */ + pr_debug("acc_read: waiting for online\n"); + ret = wait_event_interruptible(dev->read_wq, dev->online); + if (ret < 0) { + r = ret; + goto done; + } + +requeue_req: + /* queue a request */ + req = dev->rx_req[0]; + req->length = count; + dev->rx_done = 0; + ret = usb_ep_queue(dev->ep_out, req, GFP_KERNEL); + if (ret < 0) { + r = -EIO; + goto done; + } else { + pr_debug("rx %p queue\n", req); + } + + /* wait for a request to complete */ + ret = wait_event_interruptible(dev->read_wq, dev->rx_done); + if (ret < 0) { + r = ret; + usb_ep_dequeue(dev->ep_out, req); + goto done; + } + if (dev->online) { + /* If we got a 0-len packet, throw it back and try again. */ + if (req->actual == 0) + goto requeue_req; + + pr_debug("rx %p %d\n", req, req->actual); + xfer = (req->actual < count) ? req->actual : count; + r = xfer; + if (copy_to_user(buf, req->buf, xfer)) + r = -EFAULT; + } else + r = -EIO; + +done: + pr_debug("acc_read returning %d\n", r); + return r; +} + +static ssize_t acc_write(struct file *fp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct acc_dev *dev = fp->private_data; + struct usb_request *req = 0; + int r = count, xfer; + int ret; + + pr_debug("acc_write(%d)\n", count); + + if (!dev->online || dev->disconnected) + return -ENODEV; + + while (count > 0) { + if (!dev->online) { + pr_debug("acc_write dev->error\n"); + r = -EIO; + break; + } + + /* get an idle tx request to use */ + req = 0; + ret = wait_event_interruptible(dev->write_wq, + ((req = req_get(dev, &dev->tx_idle)) || !dev->online)); + if (!req) { + r = ret; + break; + } + + if (count > BULK_BUFFER_SIZE) + xfer = BULK_BUFFER_SIZE; + else + xfer = count; + if (copy_from_user(req->buf, buf, xfer)) { + r = -EFAULT; + break; + } + + req->length = xfer; + ret = usb_ep_queue(dev->ep_in, req, GFP_KERNEL); + if (ret < 0) { + pr_debug("acc_write: xfer error %d\n", ret); + r = -EIO; + break; + } + + buf += xfer; + count -= xfer; + + /* zero this so we don't try to free it on error exit */ + req = 0; + } + + if (req) + req_put(dev, &dev->tx_idle, req); + + pr_debug("acc_write returning %d\n", r); + return r; +} + +static long acc_ioctl(struct file *fp, unsigned code, unsigned long value) +{ + struct acc_dev *dev = fp->private_data; + char *src = NULL; + int ret; + + switch (code) { + case ACCESSORY_GET_STRING_MANUFACTURER: + src = dev->manufacturer; + break; + case ACCESSORY_GET_STRING_MODEL: + src = dev->model; + break; + case ACCESSORY_GET_STRING_DESCRIPTION: + src = dev->description; + break; + case ACCESSORY_GET_STRING_VERSION: + src = dev->version; + break; + case ACCESSORY_GET_STRING_URI: + src = dev->uri; + break; + case ACCESSORY_GET_STRING_SERIAL: + src = dev->serial; + break; + case ACCESSORY_IS_START_REQUESTED: + return dev->start_requested; + } + if (!src) + return -EINVAL; + + ret = strlen(src) + 1; + if (copy_to_user((void __user *)value, src, ret)) + ret = -EFAULT; + return ret; +} + +static int acc_open(struct inode *ip, struct file *fp) +{ + printk(KERN_INFO "acc_open\n"); + if (atomic_xchg(&_acc_dev->open_excl, 1)) + return -EBUSY; + + _acc_dev->disconnected = 0; + fp->private_data = _acc_dev; + return 0; +} + +static int acc_release(struct inode *ip, struct file *fp) +{ + printk(KERN_INFO "acc_release\n"); + + WARN_ON(!atomic_xchg(&_acc_dev->open_excl, 0)); + _acc_dev->disconnected = 0; + return 0; +} + +/* file operations for /dev/acc_usb */ +static const struct file_operations acc_fops = { + .owner = THIS_MODULE, + .read = acc_read, + .write = acc_write, + .unlocked_ioctl = acc_ioctl, + .open = acc_open, + .release = acc_release, +}; + +static struct miscdevice acc_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "usb_accessory", + .fops = &acc_fops, +}; + + +static int acc_ctrlrequest(struct usb_composite_dev *cdev, + const struct usb_ctrlrequest *ctrl) +{ + struct acc_dev *dev = _acc_dev; + int value = -EOPNOTSUPP; + u8 b_requestType = ctrl->bRequestType; + u8 b_request = ctrl->bRequest; + u16 w_index = le16_to_cpu(ctrl->wIndex); + u16 w_value = le16_to_cpu(ctrl->wValue); + u16 w_length = le16_to_cpu(ctrl->wLength); + +/* + printk(KERN_INFO "acc_ctrlrequest " + "%02x.%02x v%04x i%04x l%u\n", + b_requestType, b_request, + w_value, w_index, w_length); +*/ + + if (b_requestType == (USB_DIR_OUT | USB_TYPE_VENDOR)) { + if (b_request == ACCESSORY_START) { + dev->start_requested = 1; + schedule_delayed_work( + &dev->work, msecs_to_jiffies(10)); + value = 0; + } else if (b_request == ACCESSORY_SEND_STRING) { + dev->string_index = w_index; + cdev->gadget->ep0->driver_data = dev; + cdev->req->complete = acc_complete_set_string; + value = w_length; + } + } else if (b_requestType == (USB_DIR_IN | USB_TYPE_VENDOR)) { + if (b_request == ACCESSORY_GET_PROTOCOL) { + *((u16 *)cdev->req->buf) = PROTOCOL_VERSION; + value = sizeof(u16); + + /* clear any string left over from a previous session */ + memset(dev->manufacturer, 0, sizeof(dev->manufacturer)); + memset(dev->model, 0, sizeof(dev->model)); + memset(dev->description, 0, sizeof(dev->description)); + memset(dev->version, 0, sizeof(dev->version)); + memset(dev->uri, 0, sizeof(dev->uri)); + memset(dev->serial, 0, sizeof(dev->serial)); + dev->start_requested = 0; + } + } + + if (value >= 0) { + cdev->req->zero = 0; + cdev->req->length = value; + value = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC); + if (value < 0) + ERROR(cdev, "%s setup response queue error\n", + __func__); + } + + if (value == -EOPNOTSUPP) + VDBG(cdev, + "unknown class-specific control req " + "%02x.%02x v%04x i%04x l%u\n", + ctrl->bRequestType, ctrl->bRequest, + w_value, w_index, w_length); + return value; +} + +static int +acc_function_bind(struct usb_configuration *c, struct usb_function *f) +{ + struct usb_composite_dev *cdev = c->cdev; + struct acc_dev *dev = func_to_dev(f); + int id; + int ret; + + DBG(cdev, "acc_function_bind dev: %p\n", dev); + + dev->start_requested = 0; + + /* allocate interface ID(s) */ + id = usb_interface_id(c, f); + if (id < 0) + return id; + acc_interface_desc.bInterfaceNumber = id; + + /* allocate endpoints */ + ret = create_bulk_endpoints(dev, &acc_fullspeed_in_desc, + &acc_fullspeed_out_desc); + if (ret) + return ret; + + /* support high speed hardware */ + if (gadget_is_dualspeed(c->cdev->gadget)) { + acc_highspeed_in_desc.bEndpointAddress = + acc_fullspeed_in_desc.bEndpointAddress; + acc_highspeed_out_desc.bEndpointAddress = + acc_fullspeed_out_desc.bEndpointAddress; + } + + DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n", + gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full", + f->name, dev->ep_in->name, dev->ep_out->name); + return 0; +} + +static void +acc_function_unbind(struct usb_configuration *c, struct usb_function *f) +{ + struct acc_dev *dev = func_to_dev(f); + struct usb_request *req; + int i; + + while ((req = req_get(dev, &dev->tx_idle))) + acc_request_free(req, dev->ep_in); + for (i = 0; i < RX_REQ_MAX; i++) + acc_request_free(dev->rx_req[i], dev->ep_out); +} + +static void acc_work(struct work_struct *data) +{ + char *envp[2] = { "ACCESSORY=START", NULL }; + kobject_uevent_env(&acc_device.this_device->kobj, KOBJ_CHANGE, envp); +} + +static int acc_function_set_alt(struct usb_function *f, + unsigned intf, unsigned alt) +{ + struct acc_dev *dev = func_to_dev(f); + struct usb_composite_dev *cdev = f->config->cdev; + int ret; + + DBG(cdev, "acc_function_set_alt intf: %d alt: %d\n", intf, alt); + + ret = config_ep_by_speed(cdev->gadget, f, dev->ep_in); + if (ret) + return ret; + + ret = usb_ep_enable(dev->ep_in); + if (ret) + return ret; + + ret = config_ep_by_speed(cdev->gadget, f, dev->ep_out); + if (ret) + return ret; + + ret = usb_ep_enable(dev->ep_out); + if (ret) { + usb_ep_disable(dev->ep_in); + return ret; + } + + dev->online = 1; + + /* readers may be blocked waiting for us to go online */ + wake_up(&dev->read_wq); + return 0; +} + +static void acc_function_disable(struct usb_function *f) +{ + struct acc_dev *dev = func_to_dev(f); + struct usb_composite_dev *cdev = dev->cdev; + + DBG(cdev, "acc_function_disable\n"); + acc_set_disconnected(dev); + usb_ep_disable(dev->ep_in); + usb_ep_disable(dev->ep_out); + + /* readers may be blocked waiting for us to go online */ + wake_up(&dev->read_wq); + + VDBG(cdev, "%s disabled\n", dev->function.name); +} + +static int acc_bind_config(struct usb_configuration *c) +{ + struct acc_dev *dev = _acc_dev; + int ret; + + printk(KERN_INFO "acc_bind_config\n"); + + /* allocate a string ID for our interface */ + if (acc_string_defs[INTERFACE_STRING_INDEX].id == 0) { + ret = usb_string_id(c->cdev); + if (ret < 0) + return ret; + acc_string_defs[INTERFACE_STRING_INDEX].id = ret; + acc_interface_desc.iInterface = ret; + } + + dev->cdev = c->cdev; + dev->function.name = "accessory"; + dev->function.strings = acc_strings, + dev->function.descriptors = fs_acc_descs; + dev->function.hs_descriptors = hs_acc_descs; + dev->function.bind = acc_function_bind; + dev->function.unbind = acc_function_unbind; + dev->function.set_alt = acc_function_set_alt; + dev->function.disable = acc_function_disable; + + return usb_add_function(c, &dev->function); +} + +static int acc_setup(void) +{ + struct acc_dev *dev; + int ret; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + spin_lock_init(&dev->lock); + init_waitqueue_head(&dev->read_wq); + init_waitqueue_head(&dev->write_wq); + atomic_set(&dev->open_excl, 0); + INIT_LIST_HEAD(&dev->tx_idle); + INIT_DELAYED_WORK(&dev->work, acc_work); + + /* _acc_dev must be set before calling usb_gadget_register_driver */ + _acc_dev = dev; + + ret = misc_register(&acc_device); + if (ret) + goto err; + + return 0; + +err: + kfree(dev); + printk(KERN_ERR "USB accessory gadget driver failed to initialize\n"); + return ret; +} + +static void acc_cleanup(void) +{ + misc_deregister(&acc_device); + kfree(_acc_dev); + _acc_dev = NULL; +} diff --git a/include/linux/usb/f_accessory.h b/include/linux/usb/f_accessory.h new file mode 100644 index 000000000000..5b2dcf9728e1 --- /dev/null +++ b/include/linux/usb/f_accessory.h @@ -0,0 +1,83 @@ +/* + * Gadget Function Driver for Android USB accessories + * + * Copyright (C) 2011 Google, Inc. + * Author: Mike Lockwood + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __LINUX_USB_F_ACCESSORY_H +#define __LINUX_USB_F_ACCESSORY_H + +/* Use Google Vendor ID when in accessory mode */ +#define USB_ACCESSORY_VENDOR_ID 0x18D1 + + +/* Product ID to use when in accessory mode */ +#define USB_ACCESSORY_PRODUCT_ID 0x2D00 + +/* Product ID to use when in accessory mode and adb is enabled */ +#define USB_ACCESSORY_ADB_PRODUCT_ID 0x2D01 + +/* Indexes for strings sent by the host via ACCESSORY_SEND_STRING */ +#define ACCESSORY_STRING_MANUFACTURER 0 +#define ACCESSORY_STRING_MODEL 1 +#define ACCESSORY_STRING_DESCRIPTION 2 +#define ACCESSORY_STRING_VERSION 3 +#define ACCESSORY_STRING_URI 4 +#define ACCESSORY_STRING_SERIAL 5 + +/* Control request for retrieving device's protocol version (currently 1) + * + * requestType: USB_DIR_IN | USB_TYPE_VENDOR + * request: ACCESSORY_GET_PROTOCOL + * value: 0 + * index: 0 + * data version number (16 bits little endian) + */ +#define ACCESSORY_GET_PROTOCOL 51 + +/* Control request for host to send a string to the device + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SEND_STRING + * value: 0 + * index: string ID + * data zero terminated UTF8 string + * + * The device can later retrieve these strings via the + * ACCESSORY_GET_STRING_* ioctls + */ +#define ACCESSORY_SEND_STRING 52 + +/* Control request for starting device in accessory mode. + * The host sends this after setting all its strings to the device. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_START + * value: 0 + * index: 0 + * data none + */ +#define ACCESSORY_START 53 + +/* ioctls for retrieving strings set by the host */ +#define ACCESSORY_GET_STRING_MANUFACTURER _IOW('M', 1, char[256]) +#define ACCESSORY_GET_STRING_MODEL _IOW('M', 2, char[256]) +#define ACCESSORY_GET_STRING_DESCRIPTION _IOW('M', 3, char[256]) +#define ACCESSORY_GET_STRING_VERSION _IOW('M', 4, char[256]) +#define ACCESSORY_GET_STRING_URI _IOW('M', 5, char[256]) +#define ACCESSORY_GET_STRING_SERIAL _IOW('M', 6, char[256]) +/* returns 1 if there is a start request pending */ +#define ACCESSORY_IS_START_REQUESTED _IO('M', 7) + +#endif /* __LINUX_USB_F_ACCESSORY_H */ -- GitLab From a8dd454be9f66860be10fb0ca3a8a62e6a55e396 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 5 Mar 2012 13:29:45 -0800 Subject: [PATCH 0693/1442] ANDROID: usb: gadget: adb: allow freezing in adb_read wait_event_interruptible in adb_read might return -ERESTARTSYS if userspace is frozen during adb_read or another signal is delivered to adb. If so, don't set dev->error to avoid resetting the adb connection. Change-Id: I5a7baa013a9a3a3b5305de7e6a0d18546a560018 Signed-off-by: Colin Cross --- drivers/usb/gadget/f_adb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/f_adb.c b/drivers/usb/gadget/f_adb.c index 5415353ab2c1..4433a4dbdfb1 100644 --- a/drivers/usb/gadget/f_adb.c +++ b/drivers/usb/gadget/f_adb.c @@ -313,7 +313,8 @@ static ssize_t adb_read(struct file *fp, char __user *buf, /* wait for a request to complete */ ret = wait_event_interruptible(dev->read_wq, dev->rx_done); if (ret < 0) { - dev->error = 1; + if (ret != -ERESTARTSYS) + dev->error = 1; r = ret; usb_ep_dequeue(dev->ep_out, req); goto done; -- GitLab From e44897c3b83d3f0896934c2f1c97f8a7f37f1007 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Thu, 8 Mar 2012 17:57:51 -0800 Subject: [PATCH 0694/1442] ANDROID: usb: gadget: adb: do not set error flag when dequeuing req When an ep_out req is dequeued because of userspace freezing, don't set the error flag. Change-Id: I680f1a1059b8ac2244aaa069e7d42dc44abf98e9 Signed-off-by: Colin Cross --- drivers/usb/gadget/f_adb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/f_adb.c b/drivers/usb/gadget/f_adb.c index 4433a4dbdfb1..3827715f832d 100644 --- a/drivers/usb/gadget/f_adb.c +++ b/drivers/usb/gadget/f_adb.c @@ -205,7 +205,7 @@ static void adb_complete_out(struct usb_ep *ep, struct usb_request *req) struct adb_dev *dev = _adb_dev; dev->rx_done = 1; - if (req->status != 0) + if (req->status != 0 && req->status != -ECONNRESET) dev->error = 1; wake_up(&dev->read_wq); -- GitLab From 611b8d40c982025f0cf334638c6cce1b13950027 Mon Sep 17 00:00:00 2001 From: Benoit Goby Date: Mon, 19 Mar 2012 18:56:52 -0700 Subject: [PATCH 0695/1442] ANDROID: usb: gadget: adb: Only enable the gadget when adbd is ready When adb is enabled, only connect the gadget when adbd is ready. If adbd dies or is restarted (e.g. "adb root"), the gadget is disconnected when the adb device is close, and it is re-connected once adb re-open the device. - Add callbacks to adb, similar to FunctionFs callbacks, to notify the gadget when the daemon is ready or closed. - Refcount calls to android_enable/android_disable to enable the gadget only once all the function daemons are ready. - Add enable/disble to android_usb_function to notify the function when it is added/removed from the list of enabled functions. Change-Id: Id54ff85aec9cf8715c94b4f9bd6137a79ad58bfc Signed-off-by: Benoit Goby --- drivers/usb/gadget/f_adb.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/f_adb.c b/drivers/usb/gadget/f_adb.c index 3827715f832d..1629ffb5b979 100644 --- a/drivers/usb/gadget/f_adb.c +++ b/drivers/usb/gadget/f_adb.c @@ -111,6 +111,8 @@ static struct usb_descriptor_header *hs_adb_descs[] = { NULL, }; +static void adb_ready_callback(void); +static void adb_closed_callback(void); /* temporary variable used between adb_open() and adb_gadget_bind() */ static struct adb_dev *_adb_dev; @@ -407,7 +409,7 @@ static ssize_t adb_write(struct file *fp, const char __user *buf, static int adb_open(struct inode *ip, struct file *fp) { - printk(KERN_INFO "adb_open\n"); + pr_info("adb_open\n"); if (!_adb_dev) return -ENODEV; @@ -419,12 +421,17 @@ static int adb_open(struct inode *ip, struct file *fp) /* clear the error latch */ _adb_dev->error = 0; + adb_ready_callback(); + return 0; } static int adb_release(struct inode *ip, struct file *fp) { - printk(KERN_INFO "adb_release\n"); + pr_info("adb_release\n"); + + adb_closed_callback(); + adb_unlock(&_adb_dev->open_excl); return 0; } -- GitLab From 3f9860a6551f132fb25e9bbff60506b110127a82 Mon Sep 17 00:00:00 2001 From: Benoit Goby Date: Thu, 10 May 2012 16:41:40 -0700 Subject: [PATCH 0696/1442] ANDROID: usb: otg: otg-wakelock: Fix build for 3.4 Change-Id: I97e21e9e6645bf18522675039e512f85fe836794 Signed-off-by: Benoit Goby --- drivers/usb/phy/otg-wakelock.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/phy/otg-wakelock.c b/drivers/usb/phy/otg-wakelock.c index ffd8d8aa5dc8..e17e27299062 100644 --- a/drivers/usb/phy/otg-wakelock.c +++ b/drivers/usb/phy/otg-wakelock.c @@ -25,7 +25,7 @@ #define TEMPORARY_HOLD_TIME 2000 static bool enabled = true; -static struct otg_transceiver *otgwl_xceiv; +static struct usb_phy *otgwl_xceiv; static struct notifier_block otgwl_nb; /* @@ -139,10 +139,10 @@ static int __init otg_wakelock_init(void) { int ret; - otgwl_xceiv = otg_get_transceiver(); + otgwl_xceiv = usb_get_transceiver(); if (!otgwl_xceiv) { - pr_err("%s: No OTG transceiver found\n", __func__); + pr_err("%s: No USB transceiver found\n", __func__); return -ENODEV; } @@ -152,10 +152,10 @@ static int __init otg_wakelock_init(void) vbus_lock.name); otgwl_nb.notifier_call = otgwl_otg_notifications; - ret = otg_register_notifier(otgwl_xceiv, &otgwl_nb); + ret = usb_register_notifier(otgwl_xceiv, &otgwl_nb); if (ret) { - pr_err("%s: otg_register_notifier on transceiver %s" + pr_err("%s: usb_register_notifier on transceiver %s" " failed\n", __func__, dev_name(otgwl_xceiv->dev)); otgwl_xceiv = NULL; -- GitLab From ceb2f0aac6246c9a961b8abbbf428d5950a47351 Mon Sep 17 00:00:00 2001 From: Benoit Goby Date: Fri, 22 Jun 2012 18:17:10 -0700 Subject: [PATCH 0697/1442] ANDROID: usb: gadget: accessory: Fix section mismatch create_bulk_endpoints should not be __init since it is called when accessory is enabled Change-Id: If827a4531f0f6c15af938345163923186368e2a5 Signed-off-by: Benoit Goby --- drivers/usb/gadget/f_accessory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index a5818227611a..04ceb0996560 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -296,7 +296,7 @@ static void acc_complete_set_string(struct usb_ep *ep, struct usb_request *req) } } -static int __init create_bulk_endpoints(struct acc_dev *dev, +static int create_bulk_endpoints(struct acc_dev *dev, struct usb_endpoint_descriptor *in_desc, struct usb_endpoint_descriptor *out_desc) { -- GitLab From b16cf28e32989b91472c4c18d0188fa0c18d5f86 Mon Sep 17 00:00:00 2001 From: Mike Lockwood Date: Fri, 11 May 2012 09:00:40 -0700 Subject: [PATCH 0698/1442] ANDROID: USB: gadget: Add ACCESSORY_SET_AUDIO_MODE control request and ioctl The control request will be used by the host to enable/disable USB audio and the ioctl will be used by userspace to read the audio mode Change-Id: I81c38611b588451e80eacdccc417ca6e11c60cab Signed-off-by: Mike Lockwood --- drivers/usb/gadget/f_accessory.c | 11 ++++++++++- include/linux/usb/f_accessory.h | 17 ++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index 04ceb0996560..07c29d9b9264 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -40,7 +40,7 @@ #define BULK_BUFFER_SIZE 16384 #define ACC_STRING_SIZE 256 -#define PROTOCOL_VERSION 1 +#define PROTOCOL_VERSION 2 /* String IDs */ #define INTERFACE_STRING_INDEX 0 @@ -78,6 +78,8 @@ struct acc_dev { /* set to 1 if we have a pending start request */ int start_requested; + int audio_mode; + /* synchronize access to our device file */ atomic_t open_excl; @@ -510,6 +512,8 @@ static long acc_ioctl(struct file *fp, unsigned code, unsigned long value) break; case ACCESSORY_IS_START_REQUESTED: return dev->start_requested; + case ACCESSORY_GET_AUDIO_MODE: + return dev->audio_mode; } if (!src) return -EINVAL; @@ -586,6 +590,10 @@ static int acc_ctrlrequest(struct usb_composite_dev *cdev, cdev->gadget->ep0->driver_data = dev; cdev->req->complete = acc_complete_set_string; value = w_length; + } else if (b_request == ACCESSORY_SET_AUDIO_MODE && + w_index == 0 && w_length == 0) { + dev->audio_mode = w_value; + value = 0; } } else if (b_requestType == (USB_DIR_IN | USB_TYPE_VENDOR)) { if (b_request == ACCESSORY_GET_PROTOCOL) { @@ -600,6 +608,7 @@ static int acc_ctrlrequest(struct usb_composite_dev *cdev, memset(dev->uri, 0, sizeof(dev->uri)); memset(dev->serial, 0, sizeof(dev->serial)); dev->start_requested = 0; + dev->audio_mode = 0; } } diff --git a/include/linux/usb/f_accessory.h b/include/linux/usb/f_accessory.h index 5b2dcf9728e1..ddb2fd0e88fb 100644 --- a/include/linux/usb/f_accessory.h +++ b/include/linux/usb/f_accessory.h @@ -36,13 +36,15 @@ #define ACCESSORY_STRING_URI 4 #define ACCESSORY_STRING_SERIAL 5 -/* Control request for retrieving device's protocol version (currently 1) +/* Control request for retrieving device's protocol version * * requestType: USB_DIR_IN | USB_TYPE_VENDOR * request: ACCESSORY_GET_PROTOCOL * value: 0 * index: 0 * data version number (16 bits little endian) + * 1 for original accessory support + * 2 adds device to host audio support */ #define ACCESSORY_GET_PROTOCOL 51 @@ -70,6 +72,17 @@ */ #define ACCESSORY_START 53 +/* Control request for setting the audio mode. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SET_AUDIO_MODE + * value: 0 - no audio + * 1 - device to host, 44100 16-bit stereo PCM + * index: 0 + * data none + */ +#define ACCESSORY_SET_AUDIO_MODE 58 + /* ioctls for retrieving strings set by the host */ #define ACCESSORY_GET_STRING_MANUFACTURER _IOW('M', 1, char[256]) #define ACCESSORY_GET_STRING_MODEL _IOW('M', 2, char[256]) @@ -79,5 +92,7 @@ #define ACCESSORY_GET_STRING_SERIAL _IOW('M', 6, char[256]) /* returns 1 if there is a start request pending */ #define ACCESSORY_IS_START_REQUESTED _IO('M', 7) +/* returns audio mode (set via the ACCESSORY_SET_AUDIO_MODE control request) */ +#define ACCESSORY_GET_AUDIO_MODE _IO('M', 8) #endif /* __LINUX_USB_F_ACCESSORY_H */ -- GitLab From 205115de6f67a5d12ee2a818de3a0c785d25a90a Mon Sep 17 00:00:00 2001 From: Mike Lockwood Date: Mon, 26 Mar 2012 11:03:55 -0700 Subject: [PATCH 0699/1442] ANDROID: USB: gadget: f_accessory: Add support for HID input devices Change-Id: I4f1452db32508382df52acdc47c0eb395ae328c7 Signed-off-by: Mike Lockwood --- drivers/usb/gadget/f_accessory.c | 391 ++++++++++++++++++++++++++++++- include/linux/usb/f_accessory.h | 50 +++- 2 files changed, 432 insertions(+), 9 deletions(-) diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index 07c29d9b9264..921db5a193d6 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -33,6 +33,8 @@ #include #include +#include +#include #include #include #include @@ -49,6 +51,20 @@ #define TX_REQ_MAX 4 #define RX_REQ_MAX 2 +struct acc_hid_dev { + struct list_head list; + struct hid_device *hid; + struct acc_dev *dev; + /* accessory defined ID */ + int id; + /* HID report descriptor */ + u8 *report_desc; + /* length of HID report descriptor */ + int report_desc_len; + /* number of bytes of report_desc we have received so far */ + int report_desc_offset; +}; + struct acc_dev { struct usb_function function; struct usb_composite_dev *cdev; @@ -89,7 +105,21 @@ struct acc_dev { wait_queue_head_t write_wq; struct usb_request *rx_req[RX_REQ_MAX]; int rx_done; - struct delayed_work work; + + /* delayed work for handling ACCESSORY_START */ + struct delayed_work start_work; + + /* worker for registering and unregistering hid devices */ + struct work_struct hid_work; + + /* list of active HID devices */ + struct list_head hid_list; + + /* list of new HID devices to register */ + struct list_head new_hid_list; + + /* list of dead HID devices to unregister */ + struct list_head dead_hid_list; }; static struct usb_interface_descriptor acc_interface_desc = { @@ -298,7 +328,161 @@ static void acc_complete_set_string(struct usb_ep *ep, struct usb_request *req) } } -static int create_bulk_endpoints(struct acc_dev *dev, +static void acc_complete_set_hid_report_desc(struct usb_ep *ep, + struct usb_request *req) +{ + struct acc_hid_dev *hid = req->context; + struct acc_dev *dev = hid->dev; + int length = req->actual; + + if (req->status != 0) { + pr_err("acc_complete_set_hid_report_desc, err %d\n", + req->status); + return; + } + + memcpy(hid->report_desc + hid->report_desc_offset, req->buf, length); + hid->report_desc_offset += length; + if (hid->report_desc_offset == hid->report_desc_len) { + /* After we have received the entire report descriptor + * we schedule work to initialize the HID device + */ + schedule_work(&dev->hid_work); + } +} + +static void acc_complete_send_hid_event(struct usb_ep *ep, + struct usb_request *req) +{ + struct acc_hid_dev *hid = req->context; + int length = req->actual; + + if (req->status != 0) { + pr_err("acc_complete_send_hid_event, err %d\n", req->status); + return; + } + + hid_report_raw_event(hid->hid, HID_INPUT_REPORT, req->buf, length, 1); +} + +static int acc_hid_parse(struct hid_device *hid) +{ + struct acc_hid_dev *hdev = hid->driver_data; + + hid_parse_report(hid, hdev->report_desc, hdev->report_desc_len); + return 0; +} + +static int acc_hid_start(struct hid_device *hid) +{ + return 0; +} + +static void acc_hid_stop(struct hid_device *hid) +{ +} + +static int acc_hid_open(struct hid_device *hid) +{ + return 0; +} + +static void acc_hid_close(struct hid_device *hid) +{ +} + +static struct hid_ll_driver acc_hid_ll_driver = { + .parse = acc_hid_parse, + .start = acc_hid_start, + .stop = acc_hid_stop, + .open = acc_hid_open, + .close = acc_hid_close, +}; + +static struct acc_hid_dev *acc_hid_new(struct acc_dev *dev, + int id, int desc_len) +{ + struct acc_hid_dev *hdev; + + hdev = kzalloc(sizeof(*hdev), GFP_ATOMIC); + if (!hdev) + return NULL; + hdev->report_desc = kzalloc(desc_len, GFP_ATOMIC); + if (!hdev->report_desc) { + kfree(hdev); + return NULL; + } + hdev->dev = dev; + hdev->id = id; + hdev->report_desc_len = desc_len; + + return hdev; +} + +static struct acc_hid_dev *acc_hid_get(struct list_head *list, int id) +{ + struct acc_hid_dev *hid; + + list_for_each_entry(hid, list, list) { + if (hid->id == id) + return hid; + } + return NULL; +} + +static int acc_register_hid(struct acc_dev *dev, int id, int desc_length) +{ + struct acc_hid_dev *hid; + unsigned long flags; + + /* report descriptor length must be > 0 */ + if (desc_length <= 0) + return -EINVAL; + + spin_lock_irqsave(&dev->lock, flags); + /* replace HID if one already exists with this ID */ + hid = acc_hid_get(&dev->hid_list, id); + if (!hid) + hid = acc_hid_get(&dev->new_hid_list, id); + if (hid) + list_move(&hid->list, &dev->dead_hid_list); + + hid = acc_hid_new(dev, id, desc_length); + if (!hid) { + spin_unlock_irqrestore(&dev->lock, flags); + return -ENOMEM; + } + + list_add(&hid->list, &dev->new_hid_list); + spin_unlock_irqrestore(&dev->lock, flags); + + /* schedule work to register the HID device */ + schedule_work(&dev->hid_work); + return 0; +} + +static int acc_unregister_hid(struct acc_dev *dev, int id) +{ + struct acc_hid_dev *hid; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + hid = acc_hid_get(&dev->hid_list, id); + if (!hid) + hid = acc_hid_get(&dev->new_hid_list, id); + if (!hid) { + spin_unlock_irqrestore(&dev->lock, flags); + return -EINVAL; + } + + list_move(&hid->list, &dev->dead_hid_list); + spin_unlock_irqrestore(&dev->lock, flags); + + schedule_work(&dev->hid_work); + return 0; +} + +static int __init create_bulk_endpoints(struct acc_dev *dev, struct usb_endpoint_descriptor *in_desc, struct usb_endpoint_descriptor *out_desc) { @@ -355,7 +539,7 @@ static int create_bulk_endpoints(struct acc_dev *dev, return 0; fail: - printk(KERN_ERR "acc_bind() could not allocate requests\n"); + pr_err("acc_bind() could not allocate requests\n"); while ((req = req_get(dev, &dev->tx_idle))) acc_request_free(req, dev->ep_in); for (i = 0; i < RX_REQ_MAX; i++) @@ -544,7 +728,7 @@ static int acc_release(struct inode *ip, struct file *fp) return 0; } -/* file operations for /dev/acc_usb */ +/* file operations for /dev/usb_accessory */ static const struct file_operations acc_fops = { .owner = THIS_MODULE, .read = acc_read, @@ -554,23 +738,47 @@ static const struct file_operations acc_fops = { .release = acc_release, }; +static int acc_hid_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + int ret; + + ret = hid_parse(hdev); + if (ret) + return ret; + return hid_hw_start(hdev, HID_CONNECT_DEFAULT); +} + static struct miscdevice acc_device = { .minor = MISC_DYNAMIC_MINOR, .name = "usb_accessory", .fops = &acc_fops, }; +static const struct hid_device_id acc_hid_table[] = { + { HID_USB_DEVICE(HID_ANY_ID, HID_ANY_ID) }, + { } +}; + +static struct hid_driver acc_hid_driver = { + .name = "USB accessory", + .id_table = acc_hid_table, + .probe = acc_hid_probe, +}; static int acc_ctrlrequest(struct usb_composite_dev *cdev, const struct usb_ctrlrequest *ctrl) { struct acc_dev *dev = _acc_dev; int value = -EOPNOTSUPP; + struct acc_hid_dev *hid; + int offset; u8 b_requestType = ctrl->bRequestType; u8 b_request = ctrl->bRequest; u16 w_index = le16_to_cpu(ctrl->wIndex); u16 w_value = le16_to_cpu(ctrl->wValue); u16 w_length = le16_to_cpu(ctrl->wLength); + unsigned long flags; /* printk(KERN_INFO "acc_ctrlrequest " @@ -583,7 +791,7 @@ static int acc_ctrlrequest(struct usb_composite_dev *cdev, if (b_request == ACCESSORY_START) { dev->start_requested = 1; schedule_delayed_work( - &dev->work, msecs_to_jiffies(10)); + &dev->start_work, msecs_to_jiffies(10)); value = 0; } else if (b_request == ACCESSORY_SEND_STRING) { dev->string_index = w_index; @@ -594,6 +802,38 @@ static int acc_ctrlrequest(struct usb_composite_dev *cdev, w_index == 0 && w_length == 0) { dev->audio_mode = w_value; value = 0; + } else if (b_request == ACCESSORY_REGISTER_HID) { + value = acc_register_hid(dev, w_value, w_index); + } else if (b_request == ACCESSORY_UNREGISTER_HID) { + value = acc_unregister_hid(dev, w_value); + } else if (b_request == ACCESSORY_SET_HID_REPORT_DESC) { + spin_lock_irqsave(&dev->lock, flags); + hid = acc_hid_get(&dev->new_hid_list, w_value); + spin_unlock_irqrestore(&dev->lock, flags); + if (!hid) { + value = -EINVAL; + goto err; + } + offset = w_index; + if (offset != hid->report_desc_offset + || offset + w_length > hid->report_desc_len) { + value = -EINVAL; + goto err; + } + cdev->req->context = hid; + cdev->req->complete = acc_complete_set_hid_report_desc; + value = w_length; + } else if (b_request == ACCESSORY_SEND_HID_EVENT) { + spin_lock_irqsave(&dev->lock, flags); + hid = acc_hid_get(&dev->hid_list, w_value); + spin_unlock_irqrestore(&dev->lock, flags); + if (!hid) { + value = -EINVAL; + goto err; + } + cdev->req->context = hid; + cdev->req->complete = acc_complete_send_hid_event; + value = w_length; } } else if (b_requestType == (USB_DIR_IN | USB_TYPE_VENDOR)) { if (b_request == ACCESSORY_GET_PROTOCOL) { @@ -621,6 +861,7 @@ static int acc_ctrlrequest(struct usb_composite_dev *cdev, __func__); } +err: if (value == -EOPNOTSUPP) VDBG(cdev, "unknown class-specific control req " @@ -640,6 +881,10 @@ acc_function_bind(struct usb_configuration *c, struct usb_function *f) DBG(cdev, "acc_function_bind dev: %p\n", dev); + ret = hid_register_driver(&acc_hid_driver); + if (ret) + return ret; + dev->start_requested = 0; /* allocate interface ID(s) */ @@ -668,6 +913,36 @@ acc_function_bind(struct usb_configuration *c, struct usb_function *f) return 0; } +static void +kill_all_hid_devices(struct acc_dev *dev) +{ + struct acc_hid_dev *hid; + struct list_head *entry, *temp; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + list_for_each_safe(entry, temp, &dev->hid_list) { + hid = list_entry(entry, struct acc_hid_dev, list); + list_del(&hid->list); + list_add(&hid->list, &dev->dead_hid_list); + } + list_for_each_safe(entry, temp, &dev->new_hid_list) { + hid = list_entry(entry, struct acc_hid_dev, list); + list_del(&hid->list); + list_add(&hid->list, &dev->dead_hid_list); + } + spin_unlock_irqrestore(&dev->lock, flags); + + schedule_work(&dev->hid_work); +} + +static void +acc_hid_unbind(struct acc_dev *dev) +{ + hid_unregister_driver(&acc_hid_driver); + kill_all_hid_devices(dev); +} + static void acc_function_unbind(struct usb_configuration *c, struct usb_function *f) { @@ -679,14 +954,104 @@ acc_function_unbind(struct usb_configuration *c, struct usb_function *f) acc_request_free(req, dev->ep_in); for (i = 0; i < RX_REQ_MAX; i++) acc_request_free(dev->rx_req[i], dev->ep_out); + + acc_hid_unbind(dev); } -static void acc_work(struct work_struct *data) +static void acc_start_work(struct work_struct *data) { char *envp[2] = { "ACCESSORY=START", NULL }; kobject_uevent_env(&acc_device.this_device->kobj, KOBJ_CHANGE, envp); } +static int acc_hid_init(struct acc_hid_dev *hdev) +{ + struct hid_device *hid; + int ret; + + hid = hid_allocate_device(); + if (IS_ERR(hid)) + return PTR_ERR(hid); + + hid->ll_driver = &acc_hid_ll_driver; + hid->dev.parent = acc_device.this_device; + + hid->bus = BUS_USB; + hid->vendor = HID_ANY_ID; + hid->product = HID_ANY_ID; + hid->driver_data = hdev; + ret = hid_add_device(hid); + if (ret) { + pr_err("can't add hid device: %d\n", ret); + hid_destroy_device(hid); + return ret; + } + + hdev->hid = hid; + return 0; +} + +static void acc_hid_delete(struct acc_hid_dev *hid) +{ + kfree(hid->report_desc); + kfree(hid); +} + +static void acc_hid_work(struct work_struct *data) +{ + struct acc_dev *dev = _acc_dev; + struct list_head *entry, *temp; + struct acc_hid_dev *hid; + struct list_head new_list, dead_list; + unsigned long flags; + + INIT_LIST_HEAD(&new_list); + + spin_lock_irqsave(&dev->lock, flags); + + /* copy hids that are ready for initialization to new_list */ + list_for_each_safe(entry, temp, &dev->new_hid_list) { + hid = list_entry(entry, struct acc_hid_dev, list); + if (hid->report_desc_offset == hid->report_desc_len) + list_move(&hid->list, &new_list); + } + + if (list_empty(&dev->dead_hid_list)) { + INIT_LIST_HEAD(&dead_list); + } else { + /* move all of dev->dead_hid_list to dead_list */ + dead_list.prev = dev->dead_hid_list.prev; + dead_list.next = dev->dead_hid_list.next; + dead_list.next->prev = &dead_list; + dead_list.prev->next = &dead_list; + INIT_LIST_HEAD(&dev->dead_hid_list); + } + + spin_unlock_irqrestore(&dev->lock, flags); + + /* register new HID devices */ + list_for_each_safe(entry, temp, &new_list) { + hid = list_entry(entry, struct acc_hid_dev, list); + if (acc_hid_init(hid)) { + pr_err("can't add HID device %p\n", hid); + acc_hid_delete(hid); + } else { + spin_lock_irqsave(&dev->lock, flags); + list_move(&hid->list, &dev->hid_list); + spin_unlock_irqrestore(&dev->lock, flags); + } + } + + /* remove dead HID devices */ + list_for_each_safe(entry, temp, &dead_list) { + hid = list_entry(entry, struct acc_hid_dev, list); + list_del(&hid->list); + if (hid->hid) + hid_destroy_device(hid->hid); + acc_hid_delete(hid); + } +} + static int acc_function_set_alt(struct usb_function *f, unsigned intf, unsigned alt) { @@ -780,7 +1145,11 @@ static int acc_setup(void) init_waitqueue_head(&dev->write_wq); atomic_set(&dev->open_excl, 0); INIT_LIST_HEAD(&dev->tx_idle); - INIT_DELAYED_WORK(&dev->work, acc_work); + INIT_LIST_HEAD(&dev->hid_list); + INIT_LIST_HEAD(&dev->new_hid_list); + INIT_LIST_HEAD(&dev->dead_hid_list); + INIT_DELAYED_WORK(&dev->start_work, acc_start_work); + INIT_WORK(&dev->hid_work, acc_hid_work); /* _acc_dev must be set before calling usb_gadget_register_driver */ _acc_dev = dev; @@ -793,10 +1162,16 @@ static int acc_setup(void) err: kfree(dev); - printk(KERN_ERR "USB accessory gadget driver failed to initialize\n"); + pr_err("USB accessory gadget driver failed to initialize\n"); return ret; } +static void acc_disconnect(void) +{ + /* unregister all HID devices if USB is disconnected */ + kill_all_hid_devices(_acc_dev); +} + static void acc_cleanup(void) { misc_deregister(&acc_device); diff --git a/include/linux/usb/f_accessory.h b/include/linux/usb/f_accessory.h index ddb2fd0e88fb..61ebe0aabc5b 100644 --- a/include/linux/usb/f_accessory.h +++ b/include/linux/usb/f_accessory.h @@ -44,7 +44,7 @@ * index: 0 * data version number (16 bits little endian) * 1 for original accessory support - * 2 adds device to host audio support + * 2 adds HID and device to host audio support */ #define ACCESSORY_GET_PROTOCOL 51 @@ -72,6 +72,54 @@ */ #define ACCESSORY_START 53 +/* Control request for registering a HID device. + * Upon registering, a unique ID is sent by the accessory in the + * value parameter. This ID will be used for future commands for + * the device + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_REGISTER_HID_DEVICE + * value: Accessory assigned ID for the HID device + * index: total length of the HID report descriptor + * data none + */ +#define ACCESSORY_REGISTER_HID 54 + +/* Control request for unregistering a HID device. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_REGISTER_HID + * value: Accessory assigned ID for the HID device + * index: 0 + * data none + */ +#define ACCESSORY_UNREGISTER_HID 55 + +/* Control request for sending the HID report descriptor. + * If the HID descriptor is longer than the endpoint zero max packet size, + * the descriptor will be sent in multiple ACCESSORY_SET_HID_REPORT_DESC + * commands. The data for the descriptor must be sent sequentially + * if multiple packets are needed. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SET_HID_REPORT_DESC + * value: Accessory assigned ID for the HID device + * index: offset of data in descriptor + * (needed when HID descriptor is too big for one packet) + * data the HID report descriptor + */ +#define ACCESSORY_SET_HID_REPORT_DESC 56 + +/* Control request for sending HID events. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SEND_HID_EVENT + * value: Accessory assigned ID for the HID device + * index: 0 + * data the HID report for the event + */ +#define ACCESSORY_SEND_HID_EVENT 57 + /* Control request for setting the audio mode. * * requestType: USB_DIR_OUT | USB_TYPE_VENDOR -- GitLab From 34a37f82d7a5698348732d0715bbad3f2d5f379d Mon Sep 17 00:00:00 2001 From: Mike Lockwood Date: Fri, 11 May 2012 09:01:08 -0700 Subject: [PATCH 0700/1442] ANDROID: USB: gadget: f_audio_source: New gadget driver for audio output This driver presents a standard USB audio class interface to the host and an ALSA PCM device to userspace Change-Id: If16b14a5ff27045f9cb2daaf1ae9195c5eeab7d0 Signed-off-by: Mike Lockwood --- drivers/usb/gadget/f_audio_source.c | 828 ++++++++++++++++++++++++++++ 1 file changed, 828 insertions(+) create mode 100644 drivers/usb/gadget/f_audio_source.c diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c new file mode 100644 index 000000000000..c757409edf94 --- /dev/null +++ b/drivers/usb/gadget/f_audio_source.c @@ -0,0 +1,828 @@ +/* + * Gadget Function Driver for USB audio source device + * + * Copyright (C) 2012 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include + +#define SAMPLE_RATE 44100 +#define FRAMES_PER_MSEC (SAMPLE_RATE / 1000) + +#define IN_EP_MAX_PACKET_SIZE 384 + +/* Number of requests to allocate */ +#define IN_EP_REQ_COUNT 4 + +#define AUDIO_AC_INTERFACE 0 +#define AUDIO_AS_INTERFACE 1 +#define AUDIO_NUM_INTERFACES 2 + +/* B.3.1 Standard AC Interface Descriptor */ +static struct usb_interface_descriptor ac_interface_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bNumEndpoints = 0, + .bInterfaceClass = USB_CLASS_AUDIO, + .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, +}; + +DECLARE_UAC_AC_HEADER_DESCRIPTOR(2); + +#define UAC_DT_AC_HEADER_LENGTH UAC_DT_AC_HEADER_SIZE(AUDIO_NUM_INTERFACES) +/* 1 input terminal, 1 output terminal and 1 feature unit */ +#define UAC_DT_TOTAL_LENGTH (UAC_DT_AC_HEADER_LENGTH \ + + UAC_DT_INPUT_TERMINAL_SIZE + UAC_DT_OUTPUT_TERMINAL_SIZE \ + + UAC_DT_FEATURE_UNIT_SIZE(0)) +/* B.3.2 Class-Specific AC Interface Descriptor */ +static struct uac1_ac_header_descriptor_2 ac_header_desc = { + .bLength = UAC_DT_AC_HEADER_LENGTH, + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_HEADER, + .bcdADC = __constant_cpu_to_le16(0x0100), + .wTotalLength = __constant_cpu_to_le16(UAC_DT_TOTAL_LENGTH), + .bInCollection = AUDIO_NUM_INTERFACES, + .baInterfaceNr = { + [0] = AUDIO_AC_INTERFACE, + [1] = AUDIO_AS_INTERFACE, + } +}; + +#define INPUT_TERMINAL_ID 1 +static struct uac_input_terminal_descriptor input_terminal_desc = { + .bLength = UAC_DT_INPUT_TERMINAL_SIZE, + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_INPUT_TERMINAL, + .bTerminalID = INPUT_TERMINAL_ID, + .wTerminalType = UAC_INPUT_TERMINAL_MICROPHONE, + .bAssocTerminal = 0, + .wChannelConfig = 0x3, +}; + +DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(0); + +#define FEATURE_UNIT_ID 2 +static struct uac_feature_unit_descriptor_0 feature_unit_desc = { + .bLength = UAC_DT_FEATURE_UNIT_SIZE(0), + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_FEATURE_UNIT, + .bUnitID = FEATURE_UNIT_ID, + .bSourceID = INPUT_TERMINAL_ID, + .bControlSize = 2, +}; + +#define OUTPUT_TERMINAL_ID 3 +static struct uac1_output_terminal_descriptor output_terminal_desc = { + .bLength = UAC_DT_OUTPUT_TERMINAL_SIZE, + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_OUTPUT_TERMINAL, + .bTerminalID = OUTPUT_TERMINAL_ID, + .wTerminalType = UAC_TERMINAL_STREAMING, + .bAssocTerminal = FEATURE_UNIT_ID, + .bSourceID = FEATURE_UNIT_ID, +}; + +/* B.4.1 Standard AS Interface Descriptor */ +static struct usb_interface_descriptor as_interface_alt_0_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bAlternateSetting = 0, + .bNumEndpoints = 0, + .bInterfaceClass = USB_CLASS_AUDIO, + .bInterfaceSubClass = USB_SUBCLASS_AUDIOSTREAMING, +}; + +static struct usb_interface_descriptor as_interface_alt_1_desc = { + .bLength = USB_DT_INTERFACE_SIZE, + .bDescriptorType = USB_DT_INTERFACE, + .bAlternateSetting = 1, + .bNumEndpoints = 1, + .bInterfaceClass = USB_CLASS_AUDIO, + .bInterfaceSubClass = USB_SUBCLASS_AUDIOSTREAMING, +}; + +/* B.4.2 Class-Specific AS Interface Descriptor */ +static struct uac1_as_header_descriptor as_header_desc = { + .bLength = UAC_DT_AS_HEADER_SIZE, + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_AS_GENERAL, + .bTerminalLink = INPUT_TERMINAL_ID, + .bDelay = 1, + .wFormatTag = UAC_FORMAT_TYPE_I_PCM, +}; + +DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(1); + +static struct uac_format_type_i_discrete_descriptor_1 as_type_i_desc = { + .bLength = UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1), + .bDescriptorType = USB_DT_CS_INTERFACE, + .bDescriptorSubtype = UAC_FORMAT_TYPE, + .bFormatType = UAC_FORMAT_TYPE_I, + .bSubframeSize = 2, + .bBitResolution = 16, + .bSamFreqType = 1, +}; + +/* Standard ISO IN Endpoint Descriptor for highspeed */ +static struct usb_endpoint_descriptor hs_as_in_ep_desc = { + .bLength = USB_DT_ENDPOINT_AUDIO_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_SYNC_SYNC + | USB_ENDPOINT_XFER_ISOC, + .wMaxPacketSize = __constant_cpu_to_le16(IN_EP_MAX_PACKET_SIZE), + .bInterval = 4, /* poll 1 per millisecond */ +}; + +/* Standard ISO IN Endpoint Descriptor for highspeed */ +static struct usb_endpoint_descriptor fs_as_in_ep_desc = { + .bLength = USB_DT_ENDPOINT_AUDIO_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_SYNC_SYNC + | USB_ENDPOINT_XFER_ISOC, + .wMaxPacketSize = __constant_cpu_to_le16(IN_EP_MAX_PACKET_SIZE), + .bInterval = 1, /* poll 1 per millisecond */ +}; + +/* Class-specific AS ISO OUT Endpoint Descriptor */ +static struct uac_iso_endpoint_descriptor as_iso_in_desc = { + .bLength = UAC_ISO_ENDPOINT_DESC_SIZE, + .bDescriptorType = USB_DT_CS_ENDPOINT, + .bDescriptorSubtype = UAC_EP_GENERAL, + .bmAttributes = 1, + .bLockDelayUnits = 1, + .wLockDelay = __constant_cpu_to_le16(1), +}; + +static struct usb_descriptor_header *hs_audio_desc[] = { + (struct usb_descriptor_header *)&ac_interface_desc, + (struct usb_descriptor_header *)&ac_header_desc, + + (struct usb_descriptor_header *)&input_terminal_desc, + (struct usb_descriptor_header *)&output_terminal_desc, + (struct usb_descriptor_header *)&feature_unit_desc, + + (struct usb_descriptor_header *)&as_interface_alt_0_desc, + (struct usb_descriptor_header *)&as_interface_alt_1_desc, + (struct usb_descriptor_header *)&as_header_desc, + + (struct usb_descriptor_header *)&as_type_i_desc, + + (struct usb_descriptor_header *)&hs_as_in_ep_desc, + (struct usb_descriptor_header *)&as_iso_in_desc, + NULL, +}; + +static struct usb_descriptor_header *fs_audio_desc[] = { + (struct usb_descriptor_header *)&ac_interface_desc, + (struct usb_descriptor_header *)&ac_header_desc, + + (struct usb_descriptor_header *)&input_terminal_desc, + (struct usb_descriptor_header *)&output_terminal_desc, + (struct usb_descriptor_header *)&feature_unit_desc, + + (struct usb_descriptor_header *)&as_interface_alt_0_desc, + (struct usb_descriptor_header *)&as_interface_alt_1_desc, + (struct usb_descriptor_header *)&as_header_desc, + + (struct usb_descriptor_header *)&as_type_i_desc, + + (struct usb_descriptor_header *)&fs_as_in_ep_desc, + (struct usb_descriptor_header *)&as_iso_in_desc, + NULL, +}; + +static struct snd_pcm_hardware audio_hw_info = { + .info = SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_BATCH | + SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_BLOCK_TRANSFER, + + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels_min = 2, + .channels_max = 2, + .rate_min = SAMPLE_RATE, + .rate_max = SAMPLE_RATE, + + .buffer_bytes_max = 1024 * 1024, + .period_bytes_min = 64, + .period_bytes_max = 512 * 1024, + .periods_min = 2, + .periods_max = 1024, +}; + +/*-------------------------------------------------------------------------*/ + +struct audio_source_config { + int card; + int device; +}; + +struct audio_dev { + struct usb_function func; + struct snd_card *card; + struct snd_pcm *pcm; + struct snd_pcm_substream *substream; + + struct list_head idle_reqs; + struct usb_ep *in_ep; + + spinlock_t lock; + + /* beginning, end and current position in our buffer */ + void *buffer_start; + void *buffer_end; + void *buffer_pos; + + /* byte size of a "period" */ + unsigned int period; + /* bytes sent since last call to snd_pcm_period_elapsed */ + unsigned int period_offset; + /* time we started playing */ + ktime_t start_time; + /* number of frames sent since start_time */ + s64 frames_sent; +}; + +static inline struct audio_dev *func_to_audio(struct usb_function *f) +{ + return container_of(f, struct audio_dev, func); +} + +/*-------------------------------------------------------------------------*/ + +static struct usb_request *audio_request_new(struct usb_ep *ep, int buffer_size) +{ + struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) + return NULL; + + req->buf = kmalloc(buffer_size, GFP_KERNEL); + if (!req->buf) { + usb_ep_free_request(ep, req); + return NULL; + } + req->length = buffer_size; + return req; +} + +static void audio_request_free(struct usb_request *req, struct usb_ep *ep) +{ + if (req) { + kfree(req->buf); + usb_ep_free_request(ep, req); + } +} + +static void audio_req_put(struct audio_dev *audio, struct usb_request *req) +{ + unsigned long flags; + + spin_lock_irqsave(&audio->lock, flags); + list_add_tail(&req->list, &audio->idle_reqs); + spin_unlock_irqrestore(&audio->lock, flags); +} + +static struct usb_request *audio_req_get(struct audio_dev *audio) +{ + unsigned long flags; + struct usb_request *req; + + spin_lock_irqsave(&audio->lock, flags); + if (list_empty(&audio->idle_reqs)) { + req = 0; + } else { + req = list_first_entry(&audio->idle_reqs, struct usb_request, + list); + list_del(&req->list); + } + spin_unlock_irqrestore(&audio->lock, flags); + return req; +} + +/* send the appropriate number of packets to match our bitrate */ +static void audio_send(struct audio_dev *audio) +{ + struct snd_pcm_runtime *runtime; + struct usb_request *req; + int length, length1, length2, ret; + s64 msecs; + s64 frames; + ktime_t now; + + /* audio->substream will be null if we have been closed */ + if (!audio->substream) + return; + /* audio->buffer_pos will be null if we have been stopped */ + if (!audio->buffer_pos) + return; + + runtime = audio->substream->runtime; + + /* compute number of frames to send */ + now = ktime_get(); + msecs = ktime_to_ns(now) - ktime_to_ns(audio->start_time); + do_div(msecs, 1000000); + frames = msecs * SAMPLE_RATE; + do_div(frames, 1000); + + /* Readjust our frames_sent if we fall too far behind. + * If we get too far behind it is better to drop some frames than + * to keep sending data too fast in an attempt to catch up. + */ + if (frames - audio->frames_sent > 10 * FRAMES_PER_MSEC) + audio->frames_sent = frames - FRAMES_PER_MSEC; + + frames -= audio->frames_sent; + + /* We need to send something to keep the pipeline going */ + if (frames <= 0) + frames = FRAMES_PER_MSEC; + + while (frames > 0) { + req = audio_req_get(audio); + if (!req) + break; + + length = frames_to_bytes(runtime, frames); + if (length > IN_EP_MAX_PACKET_SIZE) + length = IN_EP_MAX_PACKET_SIZE; + + if (audio->buffer_pos + length > audio->buffer_end) + length1 = audio->buffer_end - audio->buffer_pos; + else + length1 = length; + memcpy(req->buf, audio->buffer_pos, length1); + if (length1 < length) { + /* Wrap around and copy remaining length + * at beginning of buffer. + */ + length2 = length - length1; + memcpy(req->buf + length1, audio->buffer_start, + length2); + audio->buffer_pos = audio->buffer_start + length2; + } else { + audio->buffer_pos += length1; + if (audio->buffer_pos >= audio->buffer_end) + audio->buffer_pos = audio->buffer_start; + } + + req->length = length; + ret = usb_ep_queue(audio->in_ep, req, GFP_ATOMIC); + if (ret < 0) { + pr_err("usb_ep_queue failed ret: %d\n", ret); + audio_req_put(audio, req); + break; + } + + frames -= bytes_to_frames(runtime, length); + audio->frames_sent += bytes_to_frames(runtime, length); + } +} + +static void audio_control_complete(struct usb_ep *ep, struct usb_request *req) +{ + /* nothing to do here */ +} + +static void audio_data_complete(struct usb_ep *ep, struct usb_request *req) +{ + struct audio_dev *audio = req->context; + + pr_debug("audio_data_complete req->status %d req->actual %d\n", + req->status, req->actual); + + audio_req_put(audio, req); + + if (!audio->buffer_start || req->status) + return; + + audio->period_offset += req->actual; + if (audio->period_offset >= audio->period) { + snd_pcm_period_elapsed(audio->substream); + audio->period_offset = 0; + } + audio_send(audio); +} + +static int audio_set_endpoint_req(struct usb_function *f, + const struct usb_ctrlrequest *ctrl) +{ + int value = -EOPNOTSUPP; + u16 ep = le16_to_cpu(ctrl->wIndex); + u16 len = le16_to_cpu(ctrl->wLength); + u16 w_value = le16_to_cpu(ctrl->wValue); + + pr_debug("bRequest 0x%x, w_value 0x%04x, len %d, endpoint %d\n", + ctrl->bRequest, w_value, len, ep); + + switch (ctrl->bRequest) { + case UAC_SET_CUR: + case UAC_SET_MIN: + case UAC_SET_MAX: + case UAC_SET_RES: + value = len; + break; + default: + break; + } + + return value; +} + +static int audio_get_endpoint_req(struct usb_function *f, + const struct usb_ctrlrequest *ctrl) +{ + struct usb_composite_dev *cdev = f->config->cdev; + int value = -EOPNOTSUPP; + u8 ep = ((le16_to_cpu(ctrl->wIndex) >> 8) & 0xFF); + u16 len = le16_to_cpu(ctrl->wLength); + u16 w_value = le16_to_cpu(ctrl->wValue); + u8 *buf = cdev->req->buf; + + pr_debug("bRequest 0x%x, w_value 0x%04x, len %d, endpoint %d\n", + ctrl->bRequest, w_value, len, ep); + + if (w_value == UAC_EP_CS_ATTR_SAMPLE_RATE << 8) { + switch (ctrl->bRequest) { + case UAC_GET_CUR: + case UAC_GET_MIN: + case UAC_GET_MAX: + case UAC_GET_RES: + /* return our sample rate */ + buf[0] = (u8)SAMPLE_RATE; + buf[1] = (u8)(SAMPLE_RATE >> 8); + buf[2] = (u8)(SAMPLE_RATE >> 16); + value = 3; + break; + default: + break; + } + } + + return value; +} + +static int +audio_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) +{ + struct usb_composite_dev *cdev = f->config->cdev; + struct usb_request *req = cdev->req; + int value = -EOPNOTSUPP; + u16 w_index = le16_to_cpu(ctrl->wIndex); + u16 w_value = le16_to_cpu(ctrl->wValue); + u16 w_length = le16_to_cpu(ctrl->wLength); + + /* composite driver infrastructure handles everything; interface + * activation uses set_alt(). + */ + switch (ctrl->bRequestType) { + case USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_ENDPOINT: + value = audio_set_endpoint_req(f, ctrl); + break; + + case USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT: + value = audio_get_endpoint_req(f, ctrl); + break; + } + + /* respond with data transfer or status phase? */ + if (value >= 0) { + pr_debug("audio req%02x.%02x v%04x i%04x l%d\n", + ctrl->bRequestType, ctrl->bRequest, + w_value, w_index, w_length); + req->zero = 0; + req->length = value; + req->complete = audio_control_complete; + value = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC); + if (value < 0) + pr_err("audio response on err %d\n", value); + } + + /* device either stalls (value < 0) or reports success */ + return value; +} + +static int audio_set_alt(struct usb_function *f, unsigned intf, unsigned alt) +{ + struct audio_dev *audio = func_to_audio(f); + struct usb_composite_dev *cdev = f->config->cdev; + int ret; + + pr_debug("audio_set_alt intf %d, alt %d\n", intf, alt); + + ret = config_ep_by_speed(cdev->gadget, f, audio->in_ep); + if (ret) + return ret; + + usb_ep_enable(audio->in_ep); + return 0; +} + +static void audio_disable(struct usb_function *f) +{ + struct audio_dev *audio = func_to_audio(f); + + pr_debug("audio_disable\n"); + usb_ep_disable(audio->in_ep); +} + +/*-------------------------------------------------------------------------*/ + +static void audio_build_desc(struct audio_dev *audio) +{ + u8 *sam_freq; + int rate; + + /* Set channel numbers */ + input_terminal_desc.bNrChannels = 2; + as_type_i_desc.bNrChannels = 2; + + /* Set sample rates */ + rate = SAMPLE_RATE; + sam_freq = as_type_i_desc.tSamFreq[0]; + memcpy(sam_freq, &rate, 3); +} + +/* audio function driver setup/binding */ +static int +audio_bind(struct usb_configuration *c, struct usb_function *f) +{ + struct usb_composite_dev *cdev = c->cdev; + struct audio_dev *audio = func_to_audio(f); + int status; + struct usb_ep *ep; + struct usb_request *req; + int i; + + audio_build_desc(audio); + + /* allocate instance-specific interface IDs, and patch descriptors */ + status = usb_interface_id(c, f); + if (status < 0) + goto fail; + ac_interface_desc.bInterfaceNumber = status; + + status = usb_interface_id(c, f); + if (status < 0) + goto fail; + as_interface_alt_0_desc.bInterfaceNumber = status; + as_interface_alt_1_desc.bInterfaceNumber = status; + + status = -ENODEV; + + /* allocate our endpoint */ + ep = usb_ep_autoconfig(cdev->gadget, &fs_as_in_ep_desc); + if (!ep) + goto fail; + audio->in_ep = ep; + ep->driver_data = audio; /* claim */ + + if (gadget_is_dualspeed(c->cdev->gadget)) + hs_as_in_ep_desc.bEndpointAddress = + fs_as_in_ep_desc.bEndpointAddress; + + f->descriptors = fs_audio_desc; + f->hs_descriptors = hs_audio_desc; + + for (i = 0, status = 0; i < IN_EP_REQ_COUNT && status == 0; i++) { + req = audio_request_new(ep, IN_EP_MAX_PACKET_SIZE); + if (req) { + req->context = audio; + req->complete = audio_data_complete; + audio_req_put(audio, req); + } else + status = -ENOMEM; + } + +fail: + return status; +} + +static void +audio_unbind(struct usb_configuration *c, struct usb_function *f) +{ + struct audio_dev *audio = func_to_audio(f); + struct usb_request *req; + + while ((req = audio_req_get(audio))) + audio_request_free(req, audio->in_ep); + + snd_card_free_when_closed(audio->card); + audio->card = NULL; + audio->pcm = NULL; + audio->substream = NULL; + audio->in_ep = NULL; +} + +static void audio_pcm_playback_start(struct audio_dev *audio) +{ + audio->start_time = ktime_get(); + audio->frames_sent = 0; + audio_send(audio); +} + +static void audio_pcm_playback_stop(struct audio_dev *audio) +{ + unsigned long flags; + + spin_lock_irqsave(&audio->lock, flags); + audio->buffer_start = 0; + audio->buffer_end = 0; + audio->buffer_pos = 0; + spin_unlock_irqrestore(&audio->lock, flags); +} + +static int audio_pcm_open(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct audio_dev *audio = substream->private_data; + + runtime->private_data = audio; + runtime->hw = audio_hw_info; + snd_pcm_limit_hw_rates(runtime); + runtime->hw.channels_max = 2; + + audio->substream = substream; + return 0; +} + +static int audio_pcm_close(struct snd_pcm_substream *substream) +{ + struct audio_dev *audio = substream->private_data; + unsigned long flags; + + spin_lock_irqsave(&audio->lock, flags); + audio->substream = NULL; + spin_unlock_irqrestore(&audio->lock, flags); + + return 0; +} + +static int audio_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + unsigned int channels = params_channels(params); + unsigned int rate = params_rate(params); + + if (rate != SAMPLE_RATE) + return -EINVAL; + if (channels != 2) + return -EINVAL; + + return snd_pcm_lib_alloc_vmalloc_buffer(substream, + params_buffer_bytes(params)); +} + +static int audio_pcm_hw_free(struct snd_pcm_substream *substream) +{ + return snd_pcm_lib_free_vmalloc_buffer(substream); +} + +static int audio_pcm_prepare(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct audio_dev *audio = runtime->private_data; + + audio->period = snd_pcm_lib_period_bytes(substream); + audio->period_offset = 0; + audio->buffer_start = runtime->dma_area; + audio->buffer_end = audio->buffer_start + + snd_pcm_lib_buffer_bytes(substream); + audio->buffer_pos = audio->buffer_start; + + return 0; +} + +static snd_pcm_uframes_t audio_pcm_pointer(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct audio_dev *audio = runtime->private_data; + ssize_t bytes = audio->buffer_pos - audio->buffer_start; + + /* return offset of next frame to fill in our buffer */ + return bytes_to_frames(runtime, bytes); +} + +static int audio_pcm_playback_trigger(struct snd_pcm_substream *substream, + int cmd) +{ + struct audio_dev *audio = substream->runtime->private_data; + int ret = 0; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + audio_pcm_playback_start(audio); + break; + + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + audio_pcm_playback_stop(audio); + break; + + default: + ret = -EINVAL; + } + + return ret; +} + +static struct audio_dev _audio_dev = { + .func = { + .name = "audio_source", + .bind = audio_bind, + .unbind = audio_unbind, + .set_alt = audio_set_alt, + .setup = audio_setup, + .disable = audio_disable, + }, + .lock = __SPIN_LOCK_UNLOCKED(_audio_dev.lock), + .idle_reqs = LIST_HEAD_INIT(_audio_dev.idle_reqs), +}; + +static struct snd_pcm_ops audio_playback_ops = { + .open = audio_pcm_open, + .close = audio_pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = audio_pcm_hw_params, + .hw_free = audio_pcm_hw_free, + .prepare = audio_pcm_prepare, + .trigger = audio_pcm_playback_trigger, + .pointer = audio_pcm_pointer, +}; + +int audio_source_bind_config(struct usb_configuration *c, + struct audio_source_config *config) +{ + struct audio_dev *audio; + struct snd_card *card; + struct snd_pcm *pcm; + int err; + + config->card = -1; + config->device = -1; + + audio = &_audio_dev; + + err = snd_card_create(SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1, + THIS_MODULE, 0, &card); + if (err) + return err; + + snd_card_set_dev(card, &c->cdev->gadget->dev); + + err = snd_pcm_new(card, "USB audio source", 0, 1, 0, &pcm); + if (err) + goto pcm_fail; + pcm->private_data = audio; + pcm->info_flags = 0; + audio->pcm = pcm; + + strlcpy(pcm->name, "USB gadget audio", sizeof(pcm->name)); + + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &audio_playback_ops); + snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, + NULL, 0, 64 * 1024); + + strlcpy(card->driver, "audio_source", sizeof(card->driver)); + strlcpy(card->shortname, card->driver, sizeof(card->shortname)); + strlcpy(card->longname, "USB accessory audio source", + sizeof(card->longname)); + + err = snd_card_register(card); + if (err) + goto register_fail; + + err = usb_add_function(c, &audio->func); + if (err) + goto add_fail; + + config->card = pcm->card->number; + config->device = pcm->device; + audio->card = card; + return 0; + +add_fail: +register_fail: +pcm_fail: + snd_card_free(audio->card); + return err; +} -- GitLab From 4ce5e656ae741e5790bdbb8d9dac1cd831c81346 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Wed, 28 Nov 2012 13:03:40 -0800 Subject: [PATCH 0701/1442] ANDROID: usb: gadget: accessory: Fix section mismatch (again) create_bulk_endpoints should not be __init since it is called when accessory is enabled. Change-Id: Iac6e9f29d53c93760e926efd8e7603432632acb4 Signed-off-by: Todd Poynor --- drivers/usb/gadget/f_accessory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index 921db5a193d6..a244265c1143 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -482,7 +482,7 @@ static int acc_unregister_hid(struct acc_dev *dev, int id) return 0; } -static int __init create_bulk_endpoints(struct acc_dev *dev, +static int create_bulk_endpoints(struct acc_dev *dev, struct usb_endpoint_descriptor *in_desc, struct usb_endpoint_descriptor *out_desc) { -- GitLab From 73693455d2eb093426885ff0307f0ad8d17cd1ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Mon, 26 Nov 2012 16:09:13 -0800 Subject: [PATCH 0702/1442] ANDROID: usb: otg: otg-wakelock: Fix build for 3.7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arve Hjønnevåg --- drivers/usb/phy/otg-wakelock.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/phy/otg-wakelock.c b/drivers/usb/phy/otg-wakelock.c index e17e27299062..479376bfa484 100644 --- a/drivers/usb/phy/otg-wakelock.c +++ b/drivers/usb/phy/otg-wakelock.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -138,13 +139,15 @@ MODULE_PARM_DESC(enabled, "enable wakelock when VBUS present"); static int __init otg_wakelock_init(void) { int ret; + struct usb_phy *phy; - otgwl_xceiv = usb_get_transceiver(); + phy = usb_get_phy(USB_PHY_TYPE_USB2); - if (!otgwl_xceiv) { + if (IS_ERR(phy)) { pr_err("%s: No USB transceiver found\n", __func__); - return -ENODEV; + return PTR_ERR(phy); } + otgwl_xceiv = phy; snprintf(vbus_lock.name, sizeof(vbus_lock.name), "vbus-%s", dev_name(otgwl_xceiv->dev)); -- GitLab From e2754393100933e51315a0624d393aa2fa48498a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Tue, 27 Nov 2012 19:29:04 -0800 Subject: [PATCH 0703/1442] ANDROID: usb: gadget: Fixes and hacks to make android usb gadget compile on 3.8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I332a6802dbd49b4018b9318b8621d26ed94c955d Signed-off-by: Arve Hjønnevåg --- drivers/usb/gadget/f_accessory.c | 2 +- drivers/usb/gadget/f_adb.c | 2 +- drivers/usb/gadget/f_audio_source.c | 2 +- drivers/usb/gadget/f_mtp.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index a244265c1143..092964c2b506 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -1121,7 +1121,7 @@ static int acc_bind_config(struct usb_configuration *c) dev->cdev = c->cdev; dev->function.name = "accessory"; dev->function.strings = acc_strings, - dev->function.descriptors = fs_acc_descs; + dev->function.fs_descriptors = fs_acc_descs; dev->function.hs_descriptors = hs_acc_descs; dev->function.bind = acc_function_bind; dev->function.unbind = acc_function_unbind; diff --git a/drivers/usb/gadget/f_adb.c b/drivers/usb/gadget/f_adb.c index 1629ffb5b979..a1d70d276953 100644 --- a/drivers/usb/gadget/f_adb.c +++ b/drivers/usb/gadget/f_adb.c @@ -566,7 +566,7 @@ static int adb_bind_config(struct usb_configuration *c) dev->cdev = c->cdev; dev->function.name = "adb"; - dev->function.descriptors = fs_adb_descs; + dev->function.fs_descriptors = fs_adb_descs; dev->function.hs_descriptors = hs_adb_descs; dev->function.bind = adb_function_bind; dev->function.unbind = adb_function_unbind; diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c index c757409edf94..56dcf217cfe5 100644 --- a/drivers/usb/gadget/f_audio_source.c +++ b/drivers/usb/gadget/f_audio_source.c @@ -599,7 +599,7 @@ audio_bind(struct usb_configuration *c, struct usb_function *f) hs_as_in_ep_desc.bEndpointAddress = fs_as_in_ep_desc.bEndpointAddress; - f->descriptors = fs_audio_desc; + f->fs_descriptors = fs_audio_desc; f->hs_descriptors = hs_audio_desc; for (i = 0, status = 0; i < IN_EP_REQ_COUNT && status == 0; i++) { diff --git a/drivers/usb/gadget/f_mtp.c b/drivers/usb/gadget/f_mtp.c index 1638977a5410..9ab94697c196 100644 --- a/drivers/usb/gadget/f_mtp.c +++ b/drivers/usb/gadget/f_mtp.c @@ -1212,10 +1212,10 @@ static int mtp_bind_config(struct usb_configuration *c, bool ptp_config) dev->function.name = "mtp"; dev->function.strings = mtp_strings; if (ptp_config) { - dev->function.descriptors = fs_ptp_descs; + dev->function.fs_descriptors = fs_ptp_descs; dev->function.hs_descriptors = hs_ptp_descs; } else { - dev->function.descriptors = fs_mtp_descs; + dev->function.fs_descriptors = fs_mtp_descs; dev->function.hs_descriptors = hs_mtp_descs; } dev->function.bind = mtp_function_bind; -- GitLab From 47aabb9567a758d45ce32720606b85cd446dad91 Mon Sep 17 00:00:00 2001 From: Benoit Goby Date: Mon, 5 Nov 2012 18:47:08 -0800 Subject: [PATCH 0704/1442] ANDROID: usb: gadget: Fix android gadget driver build Removed obsolete f_adb function Change-Id: Idfb4110429bc0ea63f493c68ad667f49ca471987 Signed-off-by: Benoit Goby --- drivers/usb/gadget/f_adb.c | 619 ------------------------------------- 1 file changed, 619 deletions(-) delete mode 100644 drivers/usb/gadget/f_adb.c diff --git a/drivers/usb/gadget/f_adb.c b/drivers/usb/gadget/f_adb.c deleted file mode 100644 index a1d70d276953..000000000000 --- a/drivers/usb/gadget/f_adb.c +++ /dev/null @@ -1,619 +0,0 @@ -/* - * Gadget Driver for Android ADB - * - * Copyright (C) 2008 Google, Inc. - * Author: Mike Lockwood - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define ADB_BULK_BUFFER_SIZE 4096 - -/* number of tx requests to allocate */ -#define TX_REQ_MAX 4 - -static const char adb_shortname[] = "android_adb"; - -struct adb_dev { - struct usb_function function; - struct usb_composite_dev *cdev; - spinlock_t lock; - - struct usb_ep *ep_in; - struct usb_ep *ep_out; - - int online; - int error; - - atomic_t read_excl; - atomic_t write_excl; - atomic_t open_excl; - - struct list_head tx_idle; - - wait_queue_head_t read_wq; - wait_queue_head_t write_wq; - struct usb_request *rx_req; - int rx_done; -}; - -static struct usb_interface_descriptor adb_interface_desc = { - .bLength = USB_DT_INTERFACE_SIZE, - .bDescriptorType = USB_DT_INTERFACE, - .bInterfaceNumber = 0, - .bNumEndpoints = 2, - .bInterfaceClass = 0xFF, - .bInterfaceSubClass = 0x42, - .bInterfaceProtocol = 1, -}; - -static struct usb_endpoint_descriptor adb_highspeed_in_desc = { - .bLength = USB_DT_ENDPOINT_SIZE, - .bDescriptorType = USB_DT_ENDPOINT, - .bEndpointAddress = USB_DIR_IN, - .bmAttributes = USB_ENDPOINT_XFER_BULK, - .wMaxPacketSize = __constant_cpu_to_le16(512), -}; - -static struct usb_endpoint_descriptor adb_highspeed_out_desc = { - .bLength = USB_DT_ENDPOINT_SIZE, - .bDescriptorType = USB_DT_ENDPOINT, - .bEndpointAddress = USB_DIR_OUT, - .bmAttributes = USB_ENDPOINT_XFER_BULK, - .wMaxPacketSize = __constant_cpu_to_le16(512), -}; - -static struct usb_endpoint_descriptor adb_fullspeed_in_desc = { - .bLength = USB_DT_ENDPOINT_SIZE, - .bDescriptorType = USB_DT_ENDPOINT, - .bEndpointAddress = USB_DIR_IN, - .bmAttributes = USB_ENDPOINT_XFER_BULK, -}; - -static struct usb_endpoint_descriptor adb_fullspeed_out_desc = { - .bLength = USB_DT_ENDPOINT_SIZE, - .bDescriptorType = USB_DT_ENDPOINT, - .bEndpointAddress = USB_DIR_OUT, - .bmAttributes = USB_ENDPOINT_XFER_BULK, -}; - -static struct usb_descriptor_header *fs_adb_descs[] = { - (struct usb_descriptor_header *) &adb_interface_desc, - (struct usb_descriptor_header *) &adb_fullspeed_in_desc, - (struct usb_descriptor_header *) &adb_fullspeed_out_desc, - NULL, -}; - -static struct usb_descriptor_header *hs_adb_descs[] = { - (struct usb_descriptor_header *) &adb_interface_desc, - (struct usb_descriptor_header *) &adb_highspeed_in_desc, - (struct usb_descriptor_header *) &adb_highspeed_out_desc, - NULL, -}; - -static void adb_ready_callback(void); -static void adb_closed_callback(void); - -/* temporary variable used between adb_open() and adb_gadget_bind() */ -static struct adb_dev *_adb_dev; - -static inline struct adb_dev *func_to_adb(struct usb_function *f) -{ - return container_of(f, struct adb_dev, function); -} - - -static struct usb_request *adb_request_new(struct usb_ep *ep, int buffer_size) -{ - struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); - if (!req) - return NULL; - - /* now allocate buffers for the requests */ - req->buf = kmalloc(buffer_size, GFP_KERNEL); - if (!req->buf) { - usb_ep_free_request(ep, req); - return NULL; - } - - return req; -} - -static void adb_request_free(struct usb_request *req, struct usb_ep *ep) -{ - if (req) { - kfree(req->buf); - usb_ep_free_request(ep, req); - } -} - -static inline int adb_lock(atomic_t *excl) -{ - if (atomic_inc_return(excl) == 1) { - return 0; - } else { - atomic_dec(excl); - return -1; - } -} - -static inline void adb_unlock(atomic_t *excl) -{ - atomic_dec(excl); -} - -/* add a request to the tail of a list */ -void adb_req_put(struct adb_dev *dev, struct list_head *head, - struct usb_request *req) -{ - unsigned long flags; - - spin_lock_irqsave(&dev->lock, flags); - list_add_tail(&req->list, head); - spin_unlock_irqrestore(&dev->lock, flags); -} - -/* remove a request from the head of a list */ -struct usb_request *adb_req_get(struct adb_dev *dev, struct list_head *head) -{ - unsigned long flags; - struct usb_request *req; - - spin_lock_irqsave(&dev->lock, flags); - if (list_empty(head)) { - req = 0; - } else { - req = list_first_entry(head, struct usb_request, list); - list_del(&req->list); - } - spin_unlock_irqrestore(&dev->lock, flags); - return req; -} - -static void adb_complete_in(struct usb_ep *ep, struct usb_request *req) -{ - struct adb_dev *dev = _adb_dev; - - if (req->status != 0) - dev->error = 1; - - adb_req_put(dev, &dev->tx_idle, req); - - wake_up(&dev->write_wq); -} - -static void adb_complete_out(struct usb_ep *ep, struct usb_request *req) -{ - struct adb_dev *dev = _adb_dev; - - dev->rx_done = 1; - if (req->status != 0 && req->status != -ECONNRESET) - dev->error = 1; - - wake_up(&dev->read_wq); -} - -static int adb_create_bulk_endpoints(struct adb_dev *dev, - struct usb_endpoint_descriptor *in_desc, - struct usb_endpoint_descriptor *out_desc) -{ - struct usb_composite_dev *cdev = dev->cdev; - struct usb_request *req; - struct usb_ep *ep; - int i; - - DBG(cdev, "create_bulk_endpoints dev: %p\n", dev); - - ep = usb_ep_autoconfig(cdev->gadget, in_desc); - if (!ep) { - DBG(cdev, "usb_ep_autoconfig for ep_in failed\n"); - return -ENODEV; - } - DBG(cdev, "usb_ep_autoconfig for ep_in got %s\n", ep->name); - ep->driver_data = dev; /* claim the endpoint */ - dev->ep_in = ep; - - ep = usb_ep_autoconfig(cdev->gadget, out_desc); - if (!ep) { - DBG(cdev, "usb_ep_autoconfig for ep_out failed\n"); - return -ENODEV; - } - DBG(cdev, "usb_ep_autoconfig for adb ep_out got %s\n", ep->name); - ep->driver_data = dev; /* claim the endpoint */ - dev->ep_out = ep; - - /* now allocate requests for our endpoints */ - req = adb_request_new(dev->ep_out, ADB_BULK_BUFFER_SIZE); - if (!req) - goto fail; - req->complete = adb_complete_out; - dev->rx_req = req; - - for (i = 0; i < TX_REQ_MAX; i++) { - req = adb_request_new(dev->ep_in, ADB_BULK_BUFFER_SIZE); - if (!req) - goto fail; - req->complete = adb_complete_in; - adb_req_put(dev, &dev->tx_idle, req); - } - - return 0; - -fail: - printk(KERN_ERR "adb_bind() could not allocate requests\n"); - return -1; -} - -static ssize_t adb_read(struct file *fp, char __user *buf, - size_t count, loff_t *pos) -{ - struct adb_dev *dev = fp->private_data; - struct usb_request *req; - int r = count, xfer; - int ret; - - pr_debug("adb_read(%d)\n", count); - if (!_adb_dev) - return -ENODEV; - - if (count > ADB_BULK_BUFFER_SIZE) - return -EINVAL; - - if (adb_lock(&dev->read_excl)) - return -EBUSY; - - /* we will block until we're online */ - while (!(dev->online || dev->error)) { - pr_debug("adb_read: waiting for online state\n"); - ret = wait_event_interruptible(dev->read_wq, - (dev->online || dev->error)); - if (ret < 0) { - adb_unlock(&dev->read_excl); - return ret; - } - } - if (dev->error) { - r = -EIO; - goto done; - } - -requeue_req: - /* queue a request */ - req = dev->rx_req; - req->length = count; - dev->rx_done = 0; - ret = usb_ep_queue(dev->ep_out, req, GFP_ATOMIC); - if (ret < 0) { - pr_debug("adb_read: failed to queue req %p (%d)\n", req, ret); - r = -EIO; - dev->error = 1; - goto done; - } else { - pr_debug("rx %p queue\n", req); - } - - /* wait for a request to complete */ - ret = wait_event_interruptible(dev->read_wq, dev->rx_done); - if (ret < 0) { - if (ret != -ERESTARTSYS) - dev->error = 1; - r = ret; - usb_ep_dequeue(dev->ep_out, req); - goto done; - } - if (!dev->error) { - /* If we got a 0-len packet, throw it back and try again. */ - if (req->actual == 0) - goto requeue_req; - - pr_debug("rx %p %d\n", req, req->actual); - xfer = (req->actual < count) ? req->actual : count; - if (copy_to_user(buf, req->buf, xfer)) - r = -EFAULT; - - } else - r = -EIO; - -done: - adb_unlock(&dev->read_excl); - pr_debug("adb_read returning %d\n", r); - return r; -} - -static ssize_t adb_write(struct file *fp, const char __user *buf, - size_t count, loff_t *pos) -{ - struct adb_dev *dev = fp->private_data; - struct usb_request *req = 0; - int r = count, xfer; - int ret; - - if (!_adb_dev) - return -ENODEV; - pr_debug("adb_write(%d)\n", count); - - if (adb_lock(&dev->write_excl)) - return -EBUSY; - - while (count > 0) { - if (dev->error) { - pr_debug("adb_write dev->error\n"); - r = -EIO; - break; - } - - /* get an idle tx request to use */ - req = 0; - ret = wait_event_interruptible(dev->write_wq, - (req = adb_req_get(dev, &dev->tx_idle)) || dev->error); - - if (ret < 0) { - r = ret; - break; - } - - if (req != 0) { - if (count > ADB_BULK_BUFFER_SIZE) - xfer = ADB_BULK_BUFFER_SIZE; - else - xfer = count; - if (copy_from_user(req->buf, buf, xfer)) { - r = -EFAULT; - break; - } - - req->length = xfer; - ret = usb_ep_queue(dev->ep_in, req, GFP_ATOMIC); - if (ret < 0) { - pr_debug("adb_write: xfer error %d\n", ret); - dev->error = 1; - r = -EIO; - break; - } - - buf += xfer; - count -= xfer; - - /* zero this so we don't try to free it on error exit */ - req = 0; - } - } - - if (req) - adb_req_put(dev, &dev->tx_idle, req); - - adb_unlock(&dev->write_excl); - pr_debug("adb_write returning %d\n", r); - return r; -} - -static int adb_open(struct inode *ip, struct file *fp) -{ - pr_info("adb_open\n"); - if (!_adb_dev) - return -ENODEV; - - if (adb_lock(&_adb_dev->open_excl)) - return -EBUSY; - - fp->private_data = _adb_dev; - - /* clear the error latch */ - _adb_dev->error = 0; - - adb_ready_callback(); - - return 0; -} - -static int adb_release(struct inode *ip, struct file *fp) -{ - pr_info("adb_release\n"); - - adb_closed_callback(); - - adb_unlock(&_adb_dev->open_excl); - return 0; -} - -/* file operations for ADB device /dev/android_adb */ -static const struct file_operations adb_fops = { - .owner = THIS_MODULE, - .read = adb_read, - .write = adb_write, - .open = adb_open, - .release = adb_release, -}; - -static struct miscdevice adb_device = { - .minor = MISC_DYNAMIC_MINOR, - .name = adb_shortname, - .fops = &adb_fops, -}; - - - - -static int -adb_function_bind(struct usb_configuration *c, struct usb_function *f) -{ - struct usb_composite_dev *cdev = c->cdev; - struct adb_dev *dev = func_to_adb(f); - int id; - int ret; - - dev->cdev = cdev; - DBG(cdev, "adb_function_bind dev: %p\n", dev); - - /* allocate interface ID(s) */ - id = usb_interface_id(c, f); - if (id < 0) - return id; - adb_interface_desc.bInterfaceNumber = id; - - /* allocate endpoints */ - ret = adb_create_bulk_endpoints(dev, &adb_fullspeed_in_desc, - &adb_fullspeed_out_desc); - if (ret) - return ret; - - /* support high speed hardware */ - if (gadget_is_dualspeed(c->cdev->gadget)) { - adb_highspeed_in_desc.bEndpointAddress = - adb_fullspeed_in_desc.bEndpointAddress; - adb_highspeed_out_desc.bEndpointAddress = - adb_fullspeed_out_desc.bEndpointAddress; - } - - DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n", - gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full", - f->name, dev->ep_in->name, dev->ep_out->name); - return 0; -} - -static void -adb_function_unbind(struct usb_configuration *c, struct usb_function *f) -{ - struct adb_dev *dev = func_to_adb(f); - struct usb_request *req; - - - dev->online = 0; - dev->error = 1; - - wake_up(&dev->read_wq); - - adb_request_free(dev->rx_req, dev->ep_out); - while ((req = adb_req_get(dev, &dev->tx_idle))) - adb_request_free(req, dev->ep_in); -} - -static int adb_function_set_alt(struct usb_function *f, - unsigned intf, unsigned alt) -{ - struct adb_dev *dev = func_to_adb(f); - struct usb_composite_dev *cdev = f->config->cdev; - int ret; - - DBG(cdev, "adb_function_set_alt intf: %d alt: %d\n", intf, alt); - - ret = config_ep_by_speed(cdev->gadget, f, dev->ep_in); - if (ret) - return ret; - - ret = usb_ep_enable(dev->ep_in); - if (ret) - return ret; - - ret = config_ep_by_speed(cdev->gadget, f, dev->ep_out); - if (ret) - return ret; - - ret = usb_ep_enable(dev->ep_out); - if (ret) { - usb_ep_disable(dev->ep_in); - return ret; - } - dev->online = 1; - - /* readers may be blocked waiting for us to go online */ - wake_up(&dev->read_wq); - return 0; -} - -static void adb_function_disable(struct usb_function *f) -{ - struct adb_dev *dev = func_to_adb(f); - struct usb_composite_dev *cdev = dev->cdev; - - DBG(cdev, "adb_function_disable cdev %p\n", cdev); - dev->online = 0; - dev->error = 1; - usb_ep_disable(dev->ep_in); - usb_ep_disable(dev->ep_out); - - /* readers may be blocked waiting for us to go online */ - wake_up(&dev->read_wq); - - VDBG(cdev, "%s disabled\n", dev->function.name); -} - -static int adb_bind_config(struct usb_configuration *c) -{ - struct adb_dev *dev = _adb_dev; - - printk(KERN_INFO "adb_bind_config\n"); - - dev->cdev = c->cdev; - dev->function.name = "adb"; - dev->function.fs_descriptors = fs_adb_descs; - dev->function.hs_descriptors = hs_adb_descs; - dev->function.bind = adb_function_bind; - dev->function.unbind = adb_function_unbind; - dev->function.set_alt = adb_function_set_alt; - dev->function.disable = adb_function_disable; - - return usb_add_function(c, &dev->function); -} - -static int adb_setup(void) -{ - struct adb_dev *dev; - int ret; - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) - return -ENOMEM; - - spin_lock_init(&dev->lock); - - init_waitqueue_head(&dev->read_wq); - init_waitqueue_head(&dev->write_wq); - - atomic_set(&dev->open_excl, 0); - atomic_set(&dev->read_excl, 0); - atomic_set(&dev->write_excl, 0); - - INIT_LIST_HEAD(&dev->tx_idle); - - _adb_dev = dev; - - ret = misc_register(&adb_device); - if (ret) - goto err; - - return 0; - -err: - kfree(dev); - printk(KERN_ERR "adb gadget driver failed to initialize\n"); - return ret; -} - -static void adb_cleanup(void) -{ - misc_deregister(&adb_device); - - kfree(_adb_dev); - _adb_dev = NULL; -} -- GitLab From 27dfc282ab2536b3023729eca0780d38e863fcef Mon Sep 17 00:00:00 2001 From: Peter Oh Date: Thu, 12 Sep 2013 01:42:18 +0000 Subject: [PATCH 0705/1442] ANDROID: USB: remove duplicate out endpoint creation in MTP mode Android MTP gadget uses 3 endpoints which are 1 in endpoint, 1 out endpoint, and 1 interrupt endpoint. However when MTP gadget creates its endpoints, it creates the out endpoint twice and overwrites the first created out endpoint with the second one, so that it causes a leak of endpoint resources. Change-Id: Iba82950095610b26b362f4b10a67cedfb1fee366 Signed-off-by: Peter Oh Reviewed-on: http://mps-gerrit.broadcom.com/37744 Reviewed-by: Graham Williams Reviewed-by: John Garry Branch-Open: Branch Status Reviewed-by: Checkpatch Status Reviewed-by: Joyjit Nath Tested-by: AutoSubmit Status --- drivers/usb/gadget/f_mtp.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/usb/gadget/f_mtp.c b/drivers/usb/gadget/f_mtp.c index 9ab94697c196..12fb818ab147 100644 --- a/drivers/usb/gadget/f_mtp.c +++ b/drivers/usb/gadget/f_mtp.c @@ -410,15 +410,6 @@ static int mtp_create_bulk_endpoints(struct mtp_dev *dev, ep->driver_data = dev; /* claim the endpoint */ dev->ep_out = ep; - ep = usb_ep_autoconfig(cdev->gadget, out_desc); - if (!ep) { - DBG(cdev, "usb_ep_autoconfig for ep_out failed\n"); - return -ENODEV; - } - DBG(cdev, "usb_ep_autoconfig for mtp ep_out got %s\n", ep->name); - ep->driver_data = dev; /* claim the endpoint */ - dev->ep_out = ep; - ep = usb_ep_autoconfig(cdev->gadget, intr_desc); if (!ep) { DBG(cdev, "usb_ep_autoconfig for ep_intr failed\n"); -- GitLab From 2453dad5790c059b660792402f58ad8a5a9d1cb3 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Thu, 7 Nov 2013 13:08:15 -0800 Subject: [PATCH 0706/1442] ANDROID: usb: gadget: f_mtp: move userspace interface to uapi Move the most of linux/usb/f_mtp.h header to uapi. Move the only remaining structure definition into f_mtp.c, the only place that uses it. Change-Id: I952c1a9dc15c36bf295a0eb4d74b6b1ad912ed03 Signed-off-by: Colin Cross --- drivers/usb/gadget/f_mtp.c | 11 ++++++ include/linux/usb/f_mtp.h | 54 +----------------------------- include/uapi/linux/usb/f_mtp.h | 61 ++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 53 deletions(-) create mode 100644 include/uapi/linux/usb/f_mtp.h diff --git a/drivers/usb/gadget/f_mtp.c b/drivers/usb/gadget/f_mtp.c index 12fb818ab147..960d64fbd40b 100644 --- a/drivers/usb/gadget/f_mtp.c +++ b/drivers/usb/gadget/f_mtp.c @@ -269,6 +269,17 @@ struct mtp_device_status { __le16 wCode; }; +struct mtp_data_header { + /* length of packet, including this header */ + __le32 length; + /* container type (2 for data packet) */ + __le16 type; + /* MTP command code */ + __le16 command; + /* MTP transaction ID */ + __le32 transaction_id; +}; + /* temporary variable used between mtp_open() and mtp_gadget_bind() */ static struct mtp_dev *_mtp_dev; diff --git a/include/linux/usb/f_mtp.h b/include/linux/usb/f_mtp.h index 72a432e2fcdd..4e8417791bea 100644 --- a/include/linux/usb/f_mtp.h +++ b/include/linux/usb/f_mtp.h @@ -18,58 +18,6 @@ #ifndef __LINUX_USB_F_MTP_H #define __LINUX_USB_F_MTP_H -#include - -#ifdef __KERNEL__ - -struct mtp_data_header { - /* length of packet, including this header */ - uint32_t length; - /* container type (2 for data packet) */ - uint16_t type; - /* MTP command code */ - uint16_t command; - /* MTP transaction ID */ - uint32_t transaction_id; -}; - -#endif /* __KERNEL__ */ - -struct mtp_file_range { - /* file descriptor for file to transfer */ - int fd; - /* offset in file for start of transfer */ - loff_t offset; - /* number of bytes to transfer */ - int64_t length; - /* MTP command ID for data header, - * used only for MTP_SEND_FILE_WITH_HEADER - */ - uint16_t command; - /* MTP transaction ID for data header, - * used only for MTP_SEND_FILE_WITH_HEADER - */ - uint32_t transaction_id; -}; - -struct mtp_event { - /* size of the event */ - size_t length; - /* event data to send */ - void *data; -}; - -/* Sends the specified file range to the host */ -#define MTP_SEND_FILE _IOW('M', 0, struct mtp_file_range) -/* Receives data from the host and writes it to a file. - * The file is created if it does not exist. - */ -#define MTP_RECEIVE_FILE _IOW('M', 1, struct mtp_file_range) -/* Sends an event to the host via the interrupt endpoint */ -#define MTP_SEND_EVENT _IOW('M', 3, struct mtp_event) -/* Sends the specified file range to the host, - * with a 12 byte MTP data packet header at the beginning. - */ -#define MTP_SEND_FILE_WITH_HEADER _IOW('M', 4, struct mtp_file_range) +#include #endif /* __LINUX_USB_F_MTP_H */ diff --git a/include/uapi/linux/usb/f_mtp.h b/include/uapi/linux/usb/f_mtp.h new file mode 100644 index 000000000000..503291855abd --- /dev/null +++ b/include/uapi/linux/usb/f_mtp.h @@ -0,0 +1,61 @@ +/* + * Gadget Function Driver for MTP + * + * Copyright (C) 2010 Google, Inc. + * Author: Mike Lockwood + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _UAPI_LINUX_USB_F_MTP_H +#define _UAPI_LINUX_USB_F_MTP_H + +#include +#include + +struct mtp_file_range { + /* file descriptor for file to transfer */ + int fd; + /* offset in file for start of transfer */ + loff_t offset; + /* number of bytes to transfer */ + int64_t length; + /* MTP command ID for data header, + * used only for MTP_SEND_FILE_WITH_HEADER + */ + uint16_t command; + /* MTP transaction ID for data header, + * used only for MTP_SEND_FILE_WITH_HEADER + */ + uint32_t transaction_id; +}; + +struct mtp_event { + /* size of the event */ + size_t length; + /* event data to send */ + void *data; +}; + +/* Sends the specified file range to the host */ +#define MTP_SEND_FILE _IOW('M', 0, struct mtp_file_range) +/* Receives data from the host and writes it to a file. + * The file is created if it does not exist. + */ +#define MTP_RECEIVE_FILE _IOW('M', 1, struct mtp_file_range) +/* Sends an event to the host via the interrupt endpoint */ +#define MTP_SEND_EVENT _IOW('M', 3, struct mtp_event) +/* Sends the specified file range to the host, + * with a 12 byte MTP data packet header at the beginning. + */ +#define MTP_SEND_FILE_WITH_HEADER _IOW('M', 4, struct mtp_file_range) + +#endif /* _UAPI_LINUX_USB_F_MTP_H */ -- GitLab From e73ec5091263b173e19be7697d6c4111bcb38750 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Thu, 7 Nov 2013 13:08:39 -0800 Subject: [PATCH 0707/1442] ANDROID: usb: gadget: f_accessory: move userspace interface to uapi Move the entire contents of linux/usb/f_accessory.h header to uapi, it only contains a userspace interface. Change-Id: Ieb5547da449588ae554988a201c0e6b4e3afc531 Signed-off-by: Colin Cross --- include/linux/usb/f_accessory.h | 125 +---------------------- include/uapi/linux/usb/f_accessory.h | 146 +++++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 124 deletions(-) create mode 100644 include/uapi/linux/usb/f_accessory.h diff --git a/include/linux/usb/f_accessory.h b/include/linux/usb/f_accessory.h index 61ebe0aabc5b..ebe3c4d59309 100644 --- a/include/linux/usb/f_accessory.h +++ b/include/linux/usb/f_accessory.h @@ -18,129 +18,6 @@ #ifndef __LINUX_USB_F_ACCESSORY_H #define __LINUX_USB_F_ACCESSORY_H -/* Use Google Vendor ID when in accessory mode */ -#define USB_ACCESSORY_VENDOR_ID 0x18D1 - - -/* Product ID to use when in accessory mode */ -#define USB_ACCESSORY_PRODUCT_ID 0x2D00 - -/* Product ID to use when in accessory mode and adb is enabled */ -#define USB_ACCESSORY_ADB_PRODUCT_ID 0x2D01 - -/* Indexes for strings sent by the host via ACCESSORY_SEND_STRING */ -#define ACCESSORY_STRING_MANUFACTURER 0 -#define ACCESSORY_STRING_MODEL 1 -#define ACCESSORY_STRING_DESCRIPTION 2 -#define ACCESSORY_STRING_VERSION 3 -#define ACCESSORY_STRING_URI 4 -#define ACCESSORY_STRING_SERIAL 5 - -/* Control request for retrieving device's protocol version - * - * requestType: USB_DIR_IN | USB_TYPE_VENDOR - * request: ACCESSORY_GET_PROTOCOL - * value: 0 - * index: 0 - * data version number (16 bits little endian) - * 1 for original accessory support - * 2 adds HID and device to host audio support - */ -#define ACCESSORY_GET_PROTOCOL 51 - -/* Control request for host to send a string to the device - * - * requestType: USB_DIR_OUT | USB_TYPE_VENDOR - * request: ACCESSORY_SEND_STRING - * value: 0 - * index: string ID - * data zero terminated UTF8 string - * - * The device can later retrieve these strings via the - * ACCESSORY_GET_STRING_* ioctls - */ -#define ACCESSORY_SEND_STRING 52 - -/* Control request for starting device in accessory mode. - * The host sends this after setting all its strings to the device. - * - * requestType: USB_DIR_OUT | USB_TYPE_VENDOR - * request: ACCESSORY_START - * value: 0 - * index: 0 - * data none - */ -#define ACCESSORY_START 53 - -/* Control request for registering a HID device. - * Upon registering, a unique ID is sent by the accessory in the - * value parameter. This ID will be used for future commands for - * the device - * - * requestType: USB_DIR_OUT | USB_TYPE_VENDOR - * request: ACCESSORY_REGISTER_HID_DEVICE - * value: Accessory assigned ID for the HID device - * index: total length of the HID report descriptor - * data none - */ -#define ACCESSORY_REGISTER_HID 54 - -/* Control request for unregistering a HID device. - * - * requestType: USB_DIR_OUT | USB_TYPE_VENDOR - * request: ACCESSORY_REGISTER_HID - * value: Accessory assigned ID for the HID device - * index: 0 - * data none - */ -#define ACCESSORY_UNREGISTER_HID 55 - -/* Control request for sending the HID report descriptor. - * If the HID descriptor is longer than the endpoint zero max packet size, - * the descriptor will be sent in multiple ACCESSORY_SET_HID_REPORT_DESC - * commands. The data for the descriptor must be sent sequentially - * if multiple packets are needed. - * - * requestType: USB_DIR_OUT | USB_TYPE_VENDOR - * request: ACCESSORY_SET_HID_REPORT_DESC - * value: Accessory assigned ID for the HID device - * index: offset of data in descriptor - * (needed when HID descriptor is too big for one packet) - * data the HID report descriptor - */ -#define ACCESSORY_SET_HID_REPORT_DESC 56 - -/* Control request for sending HID events. - * - * requestType: USB_DIR_OUT | USB_TYPE_VENDOR - * request: ACCESSORY_SEND_HID_EVENT - * value: Accessory assigned ID for the HID device - * index: 0 - * data the HID report for the event - */ -#define ACCESSORY_SEND_HID_EVENT 57 - -/* Control request for setting the audio mode. - * - * requestType: USB_DIR_OUT | USB_TYPE_VENDOR - * request: ACCESSORY_SET_AUDIO_MODE - * value: 0 - no audio - * 1 - device to host, 44100 16-bit stereo PCM - * index: 0 - * data none - */ -#define ACCESSORY_SET_AUDIO_MODE 58 - -/* ioctls for retrieving strings set by the host */ -#define ACCESSORY_GET_STRING_MANUFACTURER _IOW('M', 1, char[256]) -#define ACCESSORY_GET_STRING_MODEL _IOW('M', 2, char[256]) -#define ACCESSORY_GET_STRING_DESCRIPTION _IOW('M', 3, char[256]) -#define ACCESSORY_GET_STRING_VERSION _IOW('M', 4, char[256]) -#define ACCESSORY_GET_STRING_URI _IOW('M', 5, char[256]) -#define ACCESSORY_GET_STRING_SERIAL _IOW('M', 6, char[256]) -/* returns 1 if there is a start request pending */ -#define ACCESSORY_IS_START_REQUESTED _IO('M', 7) -/* returns audio mode (set via the ACCESSORY_SET_AUDIO_MODE control request) */ -#define ACCESSORY_GET_AUDIO_MODE _IO('M', 8) +#include #endif /* __LINUX_USB_F_ACCESSORY_H */ diff --git a/include/uapi/linux/usb/f_accessory.h b/include/uapi/linux/usb/f_accessory.h new file mode 100644 index 000000000000..0baeb7d0d74c --- /dev/null +++ b/include/uapi/linux/usb/f_accessory.h @@ -0,0 +1,146 @@ +/* + * Gadget Function Driver for Android USB accessories + * + * Copyright (C) 2011 Google, Inc. + * Author: Mike Lockwood + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _UAPI_LINUX_USB_F_ACCESSORY_H +#define _UAPI_LINUX_USB_F_ACCESSORY_H + +/* Use Google Vendor ID when in accessory mode */ +#define USB_ACCESSORY_VENDOR_ID 0x18D1 + + +/* Product ID to use when in accessory mode */ +#define USB_ACCESSORY_PRODUCT_ID 0x2D00 + +/* Product ID to use when in accessory mode and adb is enabled */ +#define USB_ACCESSORY_ADB_PRODUCT_ID 0x2D01 + +/* Indexes for strings sent by the host via ACCESSORY_SEND_STRING */ +#define ACCESSORY_STRING_MANUFACTURER 0 +#define ACCESSORY_STRING_MODEL 1 +#define ACCESSORY_STRING_DESCRIPTION 2 +#define ACCESSORY_STRING_VERSION 3 +#define ACCESSORY_STRING_URI 4 +#define ACCESSORY_STRING_SERIAL 5 + +/* Control request for retrieving device's protocol version + * + * requestType: USB_DIR_IN | USB_TYPE_VENDOR + * request: ACCESSORY_GET_PROTOCOL + * value: 0 + * index: 0 + * data version number (16 bits little endian) + * 1 for original accessory support + * 2 adds HID and device to host audio support + */ +#define ACCESSORY_GET_PROTOCOL 51 + +/* Control request for host to send a string to the device + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SEND_STRING + * value: 0 + * index: string ID + * data zero terminated UTF8 string + * + * The device can later retrieve these strings via the + * ACCESSORY_GET_STRING_* ioctls + */ +#define ACCESSORY_SEND_STRING 52 + +/* Control request for starting device in accessory mode. + * The host sends this after setting all its strings to the device. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_START + * value: 0 + * index: 0 + * data none + */ +#define ACCESSORY_START 53 + +/* Control request for registering a HID device. + * Upon registering, a unique ID is sent by the accessory in the + * value parameter. This ID will be used for future commands for + * the device + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_REGISTER_HID_DEVICE + * value: Accessory assigned ID for the HID device + * index: total length of the HID report descriptor + * data none + */ +#define ACCESSORY_REGISTER_HID 54 + +/* Control request for unregistering a HID device. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_REGISTER_HID + * value: Accessory assigned ID for the HID device + * index: 0 + * data none + */ +#define ACCESSORY_UNREGISTER_HID 55 + +/* Control request for sending the HID report descriptor. + * If the HID descriptor is longer than the endpoint zero max packet size, + * the descriptor will be sent in multiple ACCESSORY_SET_HID_REPORT_DESC + * commands. The data for the descriptor must be sent sequentially + * if multiple packets are needed. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SET_HID_REPORT_DESC + * value: Accessory assigned ID for the HID device + * index: offset of data in descriptor + * (needed when HID descriptor is too big for one packet) + * data the HID report descriptor + */ +#define ACCESSORY_SET_HID_REPORT_DESC 56 + +/* Control request for sending HID events. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SEND_HID_EVENT + * value: Accessory assigned ID for the HID device + * index: 0 + * data the HID report for the event + */ +#define ACCESSORY_SEND_HID_EVENT 57 + +/* Control request for setting the audio mode. + * + * requestType: USB_DIR_OUT | USB_TYPE_VENDOR + * request: ACCESSORY_SET_AUDIO_MODE + * value: 0 - no audio + * 1 - device to host, 44100 16-bit stereo PCM + * index: 0 + * data none + */ +#define ACCESSORY_SET_AUDIO_MODE 58 + +/* ioctls for retrieving strings set by the host */ +#define ACCESSORY_GET_STRING_MANUFACTURER _IOW('M', 1, char[256]) +#define ACCESSORY_GET_STRING_MODEL _IOW('M', 2, char[256]) +#define ACCESSORY_GET_STRING_DESCRIPTION _IOW('M', 3, char[256]) +#define ACCESSORY_GET_STRING_VERSION _IOW('M', 4, char[256]) +#define ACCESSORY_GET_STRING_URI _IOW('M', 5, char[256]) +#define ACCESSORY_GET_STRING_SERIAL _IOW('M', 6, char[256]) +/* returns 1 if there is a start request pending */ +#define ACCESSORY_IS_START_REQUESTED _IO('M', 7) +/* returns audio mode (set via the ACCESSORY_SET_AUDIO_MODE control request) */ +#define ACCESSORY_GET_AUDIO_MODE _IO('M', 8) + +#endif /* _UAPI_LINUX_USB_F_ACCESSORY_H */ -- GitLab From 1b07ec751563aa439f59669090bbc08c846d0830 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Mon, 24 Feb 2014 10:19:13 -0800 Subject: [PATCH 0708/1442] ANDROID: drivers: usb: gadget: 64-bit related type fixes Change-Id: I2f9b12e1e0cdfe64ffe20db78d319a6221821184 Signed-off-by: Greg Hackmann --- drivers/usb/gadget/f_accessory.c | 16 +++++++++------- drivers/usb/gadget/f_mtp.c | 16 +++++++++------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index 092964c2b506..73c6b2072d73 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -552,10 +552,11 @@ static ssize_t acc_read(struct file *fp, char __user *buf, { struct acc_dev *dev = fp->private_data; struct usb_request *req; - int r = count, xfer; + ssize_t r = count; + unsigned xfer; int ret = 0; - pr_debug("acc_read(%d)\n", count); + pr_debug("acc_read(%zu)\n", count); if (dev->disconnected) return -ENODEV; @@ -596,7 +597,7 @@ static ssize_t acc_read(struct file *fp, char __user *buf, if (req->actual == 0) goto requeue_req; - pr_debug("rx %p %d\n", req, req->actual); + pr_debug("rx %p %u\n", req, req->actual); xfer = (req->actual < count) ? req->actual : count; r = xfer; if (copy_to_user(buf, req->buf, xfer)) @@ -605,7 +606,7 @@ static ssize_t acc_read(struct file *fp, char __user *buf, r = -EIO; done: - pr_debug("acc_read returning %d\n", r); + pr_debug("acc_read returning %zd\n", r); return r; } @@ -614,10 +615,11 @@ static ssize_t acc_write(struct file *fp, const char __user *buf, { struct acc_dev *dev = fp->private_data; struct usb_request *req = 0; - int r = count, xfer; + ssize_t r = count; + unsigned xfer; int ret; - pr_debug("acc_write(%d)\n", count); + pr_debug("acc_write(%zu)\n", count); if (!dev->online || dev->disconnected) return -ENODEV; @@ -665,7 +667,7 @@ static ssize_t acc_write(struct file *fp, const char __user *buf, if (req) req_put(dev, &dev->tx_idle, req); - pr_debug("acc_write returning %d\n", r); + pr_debug("acc_write returning %zd\n", r); return r; } diff --git a/drivers/usb/gadget/f_mtp.c b/drivers/usb/gadget/f_mtp.c index 960d64fbd40b..620aeaaf2d72 100644 --- a/drivers/usb/gadget/f_mtp.c +++ b/drivers/usb/gadget/f_mtp.c @@ -466,10 +466,11 @@ static ssize_t mtp_read(struct file *fp, char __user *buf, struct mtp_dev *dev = fp->private_data; struct usb_composite_dev *cdev = dev->cdev; struct usb_request *req; - int r = count, xfer; + ssize_t r = count; + unsigned xfer; int ret = 0; - DBG(cdev, "mtp_read(%d)\n", count); + DBG(cdev, "mtp_read(%zu)\n", count); if (count > MTP_BULK_BUFFER_SIZE) return -EINVAL; @@ -533,7 +534,7 @@ static ssize_t mtp_read(struct file *fp, char __user *buf, dev->state = STATE_READY; spin_unlock_irq(&dev->lock); - DBG(cdev, "mtp_read returning %d\n", r); + DBG(cdev, "mtp_read returning %zd\n", r); return r; } @@ -543,11 +544,12 @@ static ssize_t mtp_write(struct file *fp, const char __user *buf, struct mtp_dev *dev = fp->private_data; struct usb_composite_dev *cdev = dev->cdev; struct usb_request *req = 0; - int r = count, xfer; + ssize_t r = count; + unsigned xfer; int sendZLP = 0; int ret; - DBG(cdev, "mtp_write(%d)\n", count); + DBG(cdev, "mtp_write(%zu)\n", count); spin_lock_irq(&dev->lock); if (dev->state == STATE_CANCELED) { @@ -624,7 +626,7 @@ static ssize_t mtp_write(struct file *fp, const char __user *buf, dev->state = STATE_READY; spin_unlock_irq(&dev->lock); - DBG(cdev, "mtp_write returning %d\n", r); + DBG(cdev, "mtp_write returning %zd\n", r); return r; } @@ -823,7 +825,7 @@ static int mtp_send_event(struct mtp_dev *dev, struct mtp_event *event) int ret; int length = event->length; - DBG(dev->cdev, "mtp_send_event(%d)\n", event->length); + DBG(dev->cdev, "mtp_send_event(%zu)\n", event->length); if (length < 0 || length > INTR_BUFFER_SIZE) return -EINVAL; -- GitLab From 64d81a260c5b3a83d4c437fa6bd591cc6fb82133 Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Mon, 23 Jun 2014 19:07:44 +0800 Subject: [PATCH 0709/1442] ANDROID: usb: gadget: f_accessory: Enabled Zero Length Packet (ZLP) for acc_write Accessory connected to Android Device requires Zero Length Packet (ZLP) to be written when data transferred out from the Android device are multiples of wMaxPacketSize (64bytes (Full-Speed) / 512bytes (High-Speed)) to end the transfer. Change-Id: Ib2c2c0ab98ef9afa10e74a720142deca5c0ed476 Signed-off-by: Anson Jacob --- drivers/usb/gadget/f_accessory.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index 73c6b2072d73..f8490c034107 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -640,10 +640,17 @@ static ssize_t acc_write(struct file *fp, const char __user *buf, break; } - if (count > BULK_BUFFER_SIZE) + if (count > BULK_BUFFER_SIZE) { xfer = BULK_BUFFER_SIZE; - else + /* ZLP, They will be more TX requests so not yet. */ + req->zero = 0; + } else { xfer = count; + /* If the data length is a multple of the + * maxpacket size then send a zero length packet(ZLP). + */ + req->zero = ((xfer % dev->ep_in->maxpacket) == 0); + } if (copy_from_user(req->buf, buf, xfer)) { r = -EFAULT; break; -- GitLab From 2c4e8fed0d1031b92ec8d9205867a38730726450 Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Tue, 1 Jul 2014 18:17:20 +0800 Subject: [PATCH 0710/1442] ANDROID: usb: gadget: f_audio_source: change max ISO packet size Re-applying from https://gitorious.org/shr/linux/commit/eb4c9d2db894c3492c0a848581bd4f6790f93d5f Most USB-AUDIO devices are limited to 256 byte for max iso buffer size. If a IN_EP_MAX_PACKET_SIZE is bigger than a USB-AUDIO device's max iso buffer size, it will cause noise. This patch will prevent this case as possibe by reducing packet size. When using 44.1khz, 2ch, 16bit audio data, if max packet size is bigger than 176 bytes, it's no problem. Credits to: Iliyan Malchev Change-Id: Ic2a1c19ea65d5fb42bf12926b51b255b465d7215 Signed-off-by: Anson Jacob --- drivers/usb/gadget/f_audio_source.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c index 56dcf217cfe5..65760c42d422 100644 --- a/drivers/usb/gadget/f_audio_source.c +++ b/drivers/usb/gadget/f_audio_source.c @@ -24,7 +24,7 @@ #define SAMPLE_RATE 44100 #define FRAMES_PER_MSEC (SAMPLE_RATE / 1000) -#define IN_EP_MAX_PACKET_SIZE 384 +#define IN_EP_MAX_PACKET_SIZE 256 /* Number of requests to allocate */ #define IN_EP_REQ_COUNT 4 -- GitLab From be102d9252827d9311f9f6c3e5ce794436f444c9 Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Mon, 23 Jun 2014 19:14:01 +0800 Subject: [PATCH 0711/1442] ANDROID: usb: gadget: f_audio_source: Fixed USB Audio Class Interface Descriptor Fixed Android Issue #56549. When both Vendor Class and Audio Class are activated for AOA 2.0, the baInterfaceNr of the AudioControl Interface Descriptor points to wrong interface numbers. They should be pointing to Audio Control Device and Audio Streaming interfaces. Replaced baInterfaceNr with the correct value. Change-Id: Iaa083f3d97c1f0fc9481bf87852b2b51278a6351 Signed-off-by: Anson Jacob --- drivers/usb/gadget/f_audio_source.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c index 65760c42d422..21ced13c83d8 100644 --- a/drivers/usb/gadget/f_audio_source.c +++ b/drivers/usb/gadget/f_audio_source.c @@ -580,12 +580,18 @@ audio_bind(struct usb_configuration *c, struct usb_function *f) goto fail; ac_interface_desc.bInterfaceNumber = status; + /* AUDIO_AC_INTERFACE */ + ac_header_desc.baInterfaceNr[0] = status; + status = usb_interface_id(c, f); if (status < 0) goto fail; as_interface_alt_0_desc.bInterfaceNumber = status; as_interface_alt_1_desc.bInterfaceNumber = status; + /* AUDIO_AS_INTERFACE */ + ac_header_desc.baInterfaceNr[1] = status; + status = -ENODEV; /* allocate our endpoint */ -- GitLab From 40bf0662fe3f794fef0a44456337cfb1b1eb45b5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Jan 2017 10:56:02 +0100 Subject: [PATCH 0712/1442] Linux 4.9.5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9175706bfe7f..2a8af8af7b27 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 4 +SUBLEVEL = 5 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From b7c55155983403fe654324e4828e79bd7aab8d76 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 3 Nov 2016 16:44:11 +0200 Subject: [PATCH 0713/1442] IB/core: Release allocated memory in cache setup failure commit aa6aae38f7fb2c030f326a6dd10b58fff1851dfa upstream. The failure in ib_cache_setup_one function during ib_register_device will leave leaked allocated memory. Fixes: 03db3a2d81e6 ("IB/core: Add RoCE GID table management") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cache.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 1a2984c28b95..ae04826e82fc 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -770,12 +770,8 @@ static int _gid_table_setup_one(struct ib_device *ib_dev) int err = 0; table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL); - - if (!table) { - pr_warn("failed to allocate ib gid cache for %s\n", - ib_dev->name); + if (!table) return -ENOMEM; - } for (port = 0; port < ib_dev->phys_port_cnt; port++) { u8 rdma_port = port + rdma_start_port(ib_dev); @@ -1170,14 +1166,13 @@ int ib_cache_setup_one(struct ib_device *device) GFP_KERNEL); if (!device->cache.pkey_cache || !device->cache.lmc_cache) { - pr_warn("Couldn't allocate cache for %s\n", device->name); - return -ENOMEM; + err = -ENOMEM; + goto free; } err = gid_table_setup_one(device); if (err) - /* Allocated memory will be cleaned in the release function */ - return err; + goto free; for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) ib_cache_update(device, p + rdma_start_port(device)); @@ -1192,6 +1187,9 @@ int ib_cache_setup_one(struct ib_device *device) err: gid_table_cleanup_one(device); +free: + kfree(device->cache.pkey_cache); + kfree(device->cache.lmc_cache); return err; } -- GitLab From 25c72b518734ec5d4dfb7ad90d785ff8efc04eb7 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:16 +0200 Subject: [PATCH 0714/1442] IB/rxe: Increase max number of completions to 32k commit d680ebed91e0b45c43ae03a880a0b43211096161 upstream. Increase limit of max CQE from 8K to 32K to allow demanding applications to work over SoftRoCE with same configuration as most RoCEv2 HW vendors have. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index f459c43a77c8..13ed2cc6eaa2 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -82,7 +82,7 @@ enum rxe_device_param { RXE_MAX_SGE = 32, RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, - RXE_MAX_LOG_CQE = 13, + RXE_MAX_LOG_CQE = 15, RXE_MAX_MR = 2 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, -- GitLab From 45f3a7eb2ca05a8cd0e56889fba96b548077f7a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 19 Sep 2016 13:57:26 +0200 Subject: [PATCH 0715/1442] IB/rxe: avoid putting a large struct rxe_qp on stack commit a0fa72683e78979ef1123d679b1c40ae28bd9096 upstream. A race condition fix added an rxe_qp structure to the stack in order to be able to perform rollback in rxe_requester(), but the structure is large enough to trigger the warning for possible stack overflow: drivers/infiniband/sw/rxe/rxe_req.c: In function 'rxe_requester': drivers/infiniband/sw/rxe/rxe_req.c:757:1: error: the frame size of 2064 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] This changes the rollback function to only save the psn inside the qp, which is the only field we access in the rollback_qp anyway. Fixes: 3050b9985024 ("IB/rxe: Fix race condition between requester and completer") Signed-off-by: Arnd Bergmann Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_req.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 22bd9630dcd9..9f46be52335e 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -548,23 +548,23 @@ static void update_wqe_psn(struct rxe_qp *qp, static void save_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 *rollback_psn) { rollback_wqe->state = wqe->state; rollback_wqe->first_psn = wqe->first_psn; rollback_wqe->last_psn = wqe->last_psn; - rollback_qp->req.psn = qp->req.psn; + *rollback_psn = qp->req.psn; } static void rollback_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 rollback_psn) { wqe->state = rollback_wqe->state; wqe->first_psn = rollback_wqe->first_psn; wqe->last_psn = rollback_wqe->last_psn; - qp->req.psn = rollback_qp->req.psn; + qp->req.psn = rollback_psn; } static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, @@ -593,8 +593,8 @@ int rxe_requester(void *arg) int mtu; int opcode; int ret; - struct rxe_qp rollback_qp; struct rxe_send_wqe rollback_wqe; + u32 rollback_psn; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -719,7 +719,7 @@ int rxe_requester(void *arg) * rxe_xmit_packet(). * Otherwise, completer might initiate an unjustified retry flow. */ - save_state(wqe, qp, &rollback_wqe, &rollback_qp); + save_state(wqe, qp, &rollback_wqe, &rollback_psn); update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); @@ -727,7 +727,7 @@ int rxe_requester(void *arg) qp->need_req_skb = 1; kfree_skb(skb); - rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); + rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); -- GitLab From 4db097fe49ea9dc608e614fbf46bc7463be0eb76 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 27 Nov 2016 15:18:21 +0200 Subject: [PATCH 0716/1442] IB/mlx5: Avoid system crash when enabling many VFs commit afd02cd3a9b6c04b41d946b5d7f6e17b3fc30c6b upstream. When enabling many VFs, the total amount of DMA mappings increase significantly. This causes DMA allocations to take a lot of time since they are serialized in the kernel. As a result the driver enters into fatal condition due to timeout and the system hangs. To recover from this we disable MR cache for VFs. PFs will still have a full cache and VFs cache can be manipulated as usual after driver load. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/mr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4e9012463c37..501af9eab6ec 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -628,7 +628,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->order = i + 2; ent->dev = dev; - if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) + if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && + (mlx5_core_is_pf(dev->mdev))) limit = dev->mdev->profile->mr_cache[i].limit; else limit = 0; -- GitLab From dae9f4f144119229e4218f92ec0876549460ed45 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 27 Oct 2016 16:36:45 +0300 Subject: [PATCH 0717/1442] IB/mlx5: Fix reported max SGE calculation commit 288c01b746aab484651391ca6d64b585d3eb5ec6 upstream. Add the 512 bytes limit of RDMA READ and the size of remote address to the max SGE calculation. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 8 +++++--- drivers/infiniband/hw/mlx5/qp.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 32b09f059c84..4cab29ea394c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -496,6 +496,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; + int max_sq_desc; int max_rq_sg; int max_sq_sg; u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); @@ -618,9 +619,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / sizeof(struct mlx5_wqe_data_seg); - max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) - - sizeof(struct mlx5_wqe_ctrl_seg)) / - sizeof(struct mlx5_wqe_data_seg); + max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); + max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d1e921816bfe..aee3942ec68d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -351,6 +351,29 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } +static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size) +{ + int max_sge; + + if (attr->qp_type == IB_QPT_RC) + max_sge = (min_t(int, wqe_size, 512) - + sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); + else if (attr->qp_type == IB_QPT_XRC_INI) + max_sge = (min_t(int, wqe_size, 512) - + sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_xrc_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); + else + max_sge = (wqe_size - sq_overhead(attr)) / + sizeof(struct mlx5_wqe_data_seg); + + return min_t(int, max_sge, wqe_size - sq_overhead(attr) / + sizeof(struct mlx5_wqe_data_seg)); +} + static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp) { @@ -387,7 +410,11 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, return -ENOMEM; } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); - qp->sq.max_gs = attr->cap.max_send_sge; + qp->sq.max_gs = get_send_sge(attr, wqe_size); + if (qp->sq.max_gs < attr->cap.max_send_sge) + return -ENOMEM; + + attr->cap.max_send_sge = qp->sq.max_gs; qp->sq.max_post = wq_size / wqe_size; attr->cap.max_send_wr = qp->sq.max_post; -- GitLab From 7c8441c89043d9914a441e5589bb76148f1a93da Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 27 Nov 2016 15:18:20 +0200 Subject: [PATCH 0718/1442] IB/mlx5: Assign SRQ type earlier commit c73b7911de97fad3ab9032a110af48d6ab2da48f upstream. Move the SRQ type assignment to be before actually using it in create_srq_user() and in create_srq_kernel() functions. Fixes: af1ba291c5e4 ('{net, IB}/mlx5: Refactor internal SRQ API') Signed-off-by: Maor Gottlieb Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/srq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 3857dbd9c956..729b0696626e 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -282,6 +282,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n", desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, srq->msrq.max_avail_gather); + in.type = init_attr->srq_type; if (pd->uobject) err = create_srq_user(pd, srq, &in, udata, buf_size); @@ -294,7 +295,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, goto err_srq; } - in.type = init_attr->srq_type; in.log_size = ilog2(srq->msrq.max); in.wqe_shift = srq->msrq.wqe_shift - 4; if (srq->wq_sig) -- GitLab From 0d64860aa6389e4cd23da3ef1eb648fbeded1060 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 27 Oct 2016 16:36:43 +0300 Subject: [PATCH 0719/1442] IB/mlx5: Wait for all async command completions to complete commit acbda523884dcf45613bf6818d8ead5180df35c2 upstream. Wait before continuing unload till all pending mkey async creation requests are done. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/mr.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 501af9eab6ec..be2d02b6a6aa 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -647,6 +647,33 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return 0; } +static void wait_for_async_commands(struct mlx5_ib_dev *dev) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int total = 0; + int i; + int j; + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + for (j = 0 ; j < 1000; j++) { + if (!ent->pending) + break; + msleep(50); + } + } + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + total += ent->pending; + } + + if (total) + mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); + else + mlx5_ib_warn(dev, "done with all pending requests\n"); +} + int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -660,6 +687,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); + wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; -- GitLab From 970f6806c2b17054ab47cfa0751d8e4150a92c1f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 10 Nov 2016 11:30:53 +0200 Subject: [PATCH 0720/1442] IB/mlx4: Set traffic class in AH commit af4295c117b82a521b05d0daf39ce879d26e6cb1 upstream. Set traffic class within sl_tclass_flowlabel when create iboe AH. Without this the TOS value will be empty when running VLAN tagged traffic, because the TOS value is taken from the traffic class in the address handle attributes. Fixes: 9106c4106974 ('IB/mlx4: Fix SL to 802.1Q priority-bits mapping for IBoE') Signed-off-by: Maor Gottlieb Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/ah.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index b9bf0759f10a..8dfc76f8cbb4 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -114,7 +114,9 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) --ah->av.eth.stat_rate; } - + ah->av.eth.sl_tclass_flowlabel |= + cpu_to_be32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); /* * HW requires multicast LID so we just choose one. */ @@ -122,7 +124,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr ah->av.ib.dlid = cpu_to_be16(0xc000); memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); - ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); + ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29); return &ah->ibah; } -- GitLab From 618d14b01a45cb0fbfe5ad1177699e4a49dec237 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 27 Nov 2016 15:18:19 +0200 Subject: [PATCH 0721/1442] IB/mlx4: Fix out-of-range array index in destroy qp flow commit c482af646d0809a8d5e1b7f4398cce3592589b98 upstream. For non-special QPs, the port value becomes non-zero only at the RESET-to-INIT transition. If the QP has not undergone that transition, its port number value is still zero. If such a QP is destroyed before being moved out of the RESET state, subtracting one from the qp port number results in a negative value. Using that negative value as an index into the qp1_proxy array results in an out-of-bounds array reference. Fix this by testing that the QP type is one that uses qp1_proxy before using the port number. For special QPs of all types, the port number is specified at QP creation time. Fixes: 9433c188915c ("IB/mlx4: Invoke UPDATE_QP for proxy QP1 on MAC changes") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 570bc866b1d6..69739473a39c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1280,7 +1280,8 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); - if (dev->qp1_proxy[mqp->port - 1] == mqp) { + if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI && + dev->qp1_proxy[mqp->port - 1] == mqp) { mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); dev->qp1_proxy[mqp->port - 1] = NULL; mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); -- GitLab From 1039b7f5c7ef027279a2f554e31bcb529163d335 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 10 Nov 2016 11:30:57 +0200 Subject: [PATCH 0722/1442] IB/mlx4: Handle well-known-gid in mad_demux processing commit befcabcd530e4ffb6f016638f693b7d94986d2ba upstream. If OpenSM runs over a ConnectX-3, and there are ConnectX-4 or Connect-IB VFs active on the network, the OpenSM will receive QP1 packets containing a GRH where the destination GID is the "Well-Known GID" -- which is not a GID in the HCA Port's GID Table. This GID must be tested-for separately -- and packets which contain this destination GID should be routed to slave 0 (the PF). Fixes: 37bfc7c1e83f ('IB/mlx4: SR-IOV multiplex and demultiplex MADs') Signed-off-by: Jack Morgenstein Signed-off-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/mad.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 1672907ff219..18d309e40f1b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -702,10 +702,18 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, /* If a grh is present, we demux according to it */ if (wc->wc_flags & IB_WC_GRH) { - slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id); - if (slave < 0) { - mlx4_ib_warn(ibdev, "failed matching grh\n"); - return -ENOENT; + if (grh->dgid.global.interface_id == + cpu_to_be64(IB_SA_WELL_KNOWN_GUID) && + grh->dgid.global.subnet_prefix == cpu_to_be64( + atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix))) { + slave = 0; + } else { + slave = mlx4_ib_find_real_gid(ibdev, port, + grh->dgid.global.interface_id); + if (slave < 0) { + mlx4_ib_warn(ibdev, "failed matching grh\n"); + return -ENOENT; + } } } /* Class-specific handling */ -- GitLab From 8f95735412ad8d9327cb890ac11a89a4b911c41b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 10 Nov 2016 11:30:59 +0200 Subject: [PATCH 0723/1442] IB/mlx4: Fix port query for 56Gb Ethernet links commit 6fa26208206c406fa529cd73f7ae6bf4181e270b upstream. Report the correct speed in the port attributes when using a 56Gbps ethernet link. Without this change the field is incorrectly set to 10. Fixes: a9c766bb75ee ('IB/mlx4: Fix info returned when querying IBoE ports') Fixes: 2e96691c31ec ('IB: Use central enum for speed instead of hard-coded values') Signed-off-by: Saeed Mahameed Signed-off-by: Yishai Hadas Signed-off-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b597e8227591..f984ebcc216f 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -697,9 +697,11 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, if (err) goto out; - props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? - IB_WIDTH_4X : IB_WIDTH_1X; - props->active_speed = IB_SPEED_QDR; + props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) || + (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_WIDTH_4X : IB_WIDTH_1X; + props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_SPEED_FDR : IB_SPEED_QDR; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; -- GitLab From 661600ff77f142b26338eb795e5694a17a6df699 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 10 Nov 2016 11:31:00 +0200 Subject: [PATCH 0724/1442] IB/mlx4: When no DMFS for IPoIB, don't allow NET_IF QPs commit 1f22e454df2eb99ba6b7ace3f594f6805cdf5cbc upstream. According to the firmware spec, FLOW_STEERING_IB_UC_QP_RANGE command is supported only if dmfs_ipoib bit is set. If it isn't set we want to ensure allocating NET_IF QPs fail. We do so by filling out the allocation bitmap. By thus, the NET_IF QPs allocating function won't find any free QP and will fail. Fixes: c1c98501121e ('IB/mlx4: Add support for steerable IB UD QPs') Signed-off-by: Eran Ben Elisha Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/main.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f984ebcc216f..46ad99595fd2 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2822,14 +2822,19 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_steer_qp_release; } - bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); - - err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( - dev, ibdev->steer_qpn_base, - ibdev->steer_qpn_base + - ibdev->steer_qpn_count - 1); - if (err) - goto err_steer_free_bitmap; + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) { + bitmap_zero(ibdev->ib_uc_qpns_bitmap, + ibdev->steer_qpn_count); + err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( + dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_base + + ibdev->steer_qpn_count - 1); + if (err) + goto err_steer_free_bitmap; + } else { + bitmap_fill(ibdev->ib_uc_qpns_bitmap, + ibdev->steer_qpn_count); + } } for (j = 1; j <= ibdev->dev->caps.num_ports; j++) -- GitLab From 656a7d6575336348000bcf42e9827b3b4ff95102 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 10 Nov 2016 11:31:01 +0200 Subject: [PATCH 0725/1442] IB/mlx4: Check if GRH is available before using it commit bf08e884bfd5be068fd2ccf2bc450f085d8dd853 upstream. Before reading GRH attributes, need to make sure AH contains GRH, and in addition, initialize GID type. Fixes: dbf727de7440 ('IB/core: Use GID table in AH creation and dmac resolution') Signed-off-by: Eran Ben Elisha Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx4/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 69739473a39c..c22454383976 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1765,14 +1765,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; union ib_gid gid; - struct ib_gid_attr gid_attr; + struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; u16 vlan = 0xffff; u8 smac[ETH_ALEN]; int status = 0; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && attr->ah_attr.ah_flags & IB_AH_GRH; - if (is_eth) { + if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) { int index = attr->ah_attr.grh.sgid_index; status = ib_get_cached_gid(ibqp->device, port_num, -- GitLab From d622b626270ae58c0d40c55a97170ac464c9063a Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 10 Nov 2016 10:16:48 +0200 Subject: [PATCH 0726/1442] IB/IPoIB: Remove can't use GFP_NOIO warning commit 0b59970e7d96edcb3c7f651d9d48e1a59af3c3b0 upstream. Remove the warning print of "can't use of GFP_NOIO" to avoid prints in each QP creation when devices aren't supporting IB_QP_CREATE_USE_GFP_NOIO. This print become more annoying when the IPoIB interface is configured to work in connected mode. Fixes: 09b93088d750 ('IB: Add a QP creation flag to use GFP_NOIO allocations') Signed-off-by: Kamal Heib Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 339a1eecdfe3..81a8080c18b3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1054,8 +1054,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { - ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", - priv->ca->name); attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; tx_qp = ib_create_qp(priv->pd, &attr); } -- GitLab From 567eea578016f181968f76f9e81bc98ecf231ae9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Oct 2016 11:28:32 -0300 Subject: [PATCH 0727/1442] perf trace: Use the syscall raw_syscalls:sys_enter timestamp commit ecf1e2253ea79c6204f4d6a5e756e8fb4aed5a7e upstream. Instead of the one when another syscall takes place while another is being processed (in another CPU, but we show it serialized, so need to "interrupt" the other), and also when finally showing the sys_enter + sys_exit + duration, where we were showing the sample->time for the sys_exit, duh. Before: # perf trace sleep 1 0.373 ( 0.001 ms): close(fd: 3 ) = 0 1000.626 (1000.211 ms): nanosleep(rqtp: 0x7ffd6ddddfb0) = 0 1000.653 ( 0.003 ms): close(fd: 1 ) = 0 1000.657 ( 0.002 ms): close(fd: 2 ) = 0 1000.667 ( 0.000 ms): exit_group( ) # After: # perf trace sleep 1 0.336 ( 0.001 ms): close(fd: 3 ) = 0 0.373 (1000.086 ms): nanosleep(rqtp: 0x7ffe303e9550) = 0 1000.481 ( 0.002 ms): close(fd: 1 ) = 0 1000.485 ( 0.001 ms): close(fd: 2 ) = 0 1000.494 ( 0.000 ms): exit_group( ) [root@jouet linux]# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ecbzgmu2ni6glc6zkw8p1zmx@git.kernel.org Fixes: 752fde44fd1c ("perf trace: Support interrupted syscalls") Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c298bd3e1d90..21f8a81797a0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1452,7 +1452,7 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp duration = sample->time - ttrace->entry_time; - printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); + printed = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output); printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; @@ -1499,7 +1499,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (sc->is_exit) { if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { - trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); + trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output); fprintf(trace->output, "%-70s)\n", ttrace->entry_str); } } else { @@ -1592,7 +1592,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (trace->summary_only) goto out; - trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); + trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output); if (ttrace->entry_pending) { fprintf(trace->output, "%-70s", ttrace->entry_str); -- GitLab From 8b5484fab145d4c03c8afff7223d687668abf5d9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Dec 2016 11:35:39 +0100 Subject: [PATCH 0728/1442] perf mem: Fix --all-user/--all-kernel options commit 631ac41b46d293fb3ee43a809776c1663de8d9c6 upstream. Removing extra '--' prefix. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: ad16511b0e40 ("perf mem: Add -U/-K (--all-user/--all-kernel) options") Link: http://lkml.kernel.org/r/1481538943-21874-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index d1ce29be560e..cd7bc4d104e2 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -70,8 +70,8 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), - OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"), - OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"), + OPT_BOOLEAN('U', "all-user", &all_user, "collect only user level data"), + OPT_BOOLEAN('K', "all-kernel", &all_kernel, "collect only kernel level data"), OPT_END() }; -- GitLab From 0f242c0ae0a6d414bab8aa7aa6ad0e55fb669bb0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Dec 2016 20:56:54 +0100 Subject: [PATCH 0729/1442] perf trace: Check if MAP_32BIT is defined (again) commit 2bd42f3aaa53ebe78b9be6f898b7945dd61f9773 upstream. There might be systems where MAP_32BIT is not defined, like some some RHEL7 powerpc versions. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Kyle McMartin Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 256763b01741 ("perf trace beauty mmap: Add more conditional defines") Link: http://lkml.kernel.org/r/1481831814-23683-1-git-send-email-jolsa@kernel.org [ Changed the Fixme cset to the one removing the conditional switch case for MAP_32BIT ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/trace/beauty/mmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index fd710ab33684..af1cfde6b97b 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -42,7 +42,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(SHARED); P_MMAP_FLAG(PRIVATE); +#ifdef MAP_32BIT P_MMAP_FLAG(32BIT); +#endif P_MMAP_FLAG(ANONYMOUS); P_MMAP_FLAG(DENYWRITE); P_MMAP_FLAG(EXECUTABLE); -- GitLab From a1d308914090d689973af1b4cb8d0141a91d7a46 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 13 Dec 2016 10:29:44 -0500 Subject: [PATCH 0730/1442] perf diff: Do not overwrite valid build id commit ed6c166cc7dc329736cace3affd2df984fb22ec8 upstream. Fixes a perf diff regression issue which was introduced by commit 5baecbcd9c9a ("perf symbols: we can now read separate debug-info files based on a build ID") The binary name could be same when perf diff different binaries. Build id is used to distinguish between them. However, the previous patch assumes the same binary name has same build id. So it overwrites the build id according to the binary name, regardless of whether the build id is set or not. Check the has_build_id in dso__load. If the build id is already set, use it. Before the fix: $ perf diff 1.perf.data 2.perf.data # Event 'cycles' # # Baseline Delta Shared Object Symbol # ........ ....... ................ ............................. # 99.83% -99.80% tchain_edit [.] f2 0.12% +99.81% tchain_edit [.] f3 0.02% -0.01% [ixgbe] [k] ixgbe_read_reg After the fix: $ perf diff 1.perf.data 2.perf.data # Event 'cycles' # # Baseline Delta Shared Object Symbol # ........ ....... ................ ............................. # 99.83% +0.10% tchain_edit [.] f3 0.12% -0.08% tchain_edit [.] f2 Signed-off-by: Kan Liang Cc: Andi Kleen CC: Dima Kogan Cc: Jiri Olsa Cc: Namhyung Kim Fixes: 5baecbcd9c9a ("perf symbols: we can now read separate debug-info files based on a build ID") Link: http://lkml.kernel.org/r/1481642984-13593-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/symbol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index aecff69a510d..f7b35e178582 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1459,7 +1459,8 @@ int dso__load(struct dso *dso, struct map *map) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (is_regular_file(dso->long_name) && + if (!dso->has_build_id && + is_regular_file(dso->long_name) && filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) dso__set_build_id(dso, build_id); -- GitLab From ea738532666a465d0a7fc0546e35d6acb9242a29 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Wed, 10 Aug 2016 15:52:28 +0200 Subject: [PATCH 0731/1442] perf callchain: Fixup help/config for no-unwinding commit c56cb33b56c13493eeb95612f80e4dd6e35cd109 upstream. Since 841e3558b2d ("perf callchain: Recording 'dwarf' callchains do not need DWARF unwinding support"), --call-graph dwarf is allowed in 'perf record' even without unwind support. A couple of other places don't reflect this yet though: the help text should list dwarf as a valid record mode and the dump_size config should be respected too. Signed-off-by: Rabin Vincent Cc: He Kuang Fixes: 841e3558b2de ("perf callchain: Recording 'dwarf' callchains do not need DWARF unwinding support") Link: http://lkml.kernel.org/r/1470837148-7642-1-git-send-email-rabin.vincent@axis.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/callchain.c | 2 -- tools/perf/util/callchain.h | 4 ---- 2 files changed, 6 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 07fd30bc2f81..ae58b493af45 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -193,7 +193,6 @@ int perf_callchain_config(const char *var, const char *value) if (!strcmp(var, "record-mode")) return parse_callchain_record_opt(value, &callchain_param); -#ifdef HAVE_DWARF_UNWIND_SUPPORT if (!strcmp(var, "dump-size")) { unsigned long size = 0; int ret; @@ -203,7 +202,6 @@ int perf_callchain_config(const char *var, const char *value) return ret; } -#endif if (!strcmp(var, "print-type")) return parse_callchain_mode(value); if (!strcmp(var, "order")) diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 13e75549c440..47cfd1080975 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -11,11 +11,7 @@ #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" -#ifdef HAVE_DWARF_UNWIND_SUPPORT # define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n" -#else -# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|lbr)\n" -#endif #define RECORD_SIZE_HELP \ HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording ()\n" \ -- GitLab From 3ef6a3c0136b2005ff887e777f51200938ac3214 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Oct 2016 17:20:47 -0300 Subject: [PATCH 0732/1442] perf scripting: Avoid leaking the scripting_context variable commit cf346d5bd4b9d61656df2f72565c9b354ef3ca0d upstream. Both register_perl_scripting() and register_python_scripting() allocate this variable, fix it by checking if it already was. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tom Zanussi Cc: Wang Nan Fixes: 7e4b21b84c43 ("perf/scripts: Add Python scripting engine") Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/trace-event-scripting.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 9df61059a85d..a2fd6e79d5a5 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -95,7 +95,8 @@ static void register_python_scripting(struct scripting_ops *scripting_ops) if (err) die("error registering py script extension"); - scripting_context = malloc(sizeof(struct scripting_context)); + if (scripting_context == NULL) + scripting_context = malloc(sizeof(*scripting_context)); } #ifdef NO_LIBPYTHON @@ -159,7 +160,8 @@ static void register_perl_scripting(struct scripting_ops *scripting_ops) if (err) die("error registering pl script extension"); - scripting_context = malloc(sizeof(struct scripting_context)); + if (scripting_context == NULL) + scripting_context = malloc(sizeof(*scripting_context)); } #ifdef NO_LIBPERL -- GitLab From 03f5be20ec9befe477e9978bdba2b1f4f2ca9e42 Mon Sep 17 00:00:00 2001 From: Maciej Debski Date: Thu, 13 Oct 2016 03:59:36 -0700 Subject: [PATCH 0733/1442] perf jit: Enable jitdump support without dwarf commit 621cb4e7837e39d25a5af5a785ad282cdd2b4ce8 upstream. This patch modifies the build dependencies on the jitdump support in perf. As it stands jitdump was wrongfully made dependent 100% on using DWARF. However, the dwarf dependency, only exist if generating the source line table in genelf_debug.c. The rest of the support does not need DWARF. This patch removes the dependency on DWARF for the entire jitdump support. It keeps it only for the genelf_debug.c support. Signed-off-by: Maciej Debski Reviewed-by: Stephane Eranian Cc: Anton Blanchard Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1476356383-30100-3-git-send-email-eranian@google.com Fixes: e12b202f8fb9 ("perf jitdump: Build only on supported archs") [ Make it build only if NO_LIBELF isn't defined, as jitdump.o will only be built in that case ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/Makefile.config | 2 +- tools/perf/util/Build | 2 +- tools/perf/util/genelf.c | 9 +++++++-- tools/perf/util/genelf.h | 2 ++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 72edf83d76b7..cffdd9cf3ebf 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -366,7 +366,7 @@ ifndef NO_SDT endif ifdef PERF_HAVE_JITDUMP - ifndef NO_DWARF + ifndef NO_LIBELF $(call detected,CONFIG_JITDUMP) CFLAGS += -DHAVE_JITDUMP endif diff --git a/tools/perf/util/Build b/tools/perf/util/Build index eb60e613d795..1dc67efad634 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -120,7 +120,7 @@ libperf-y += demangle-rust.o ifdef CONFIG_JITDUMP libperf-$(CONFIG_LIBELF) += jitdump.o libperf-$(CONFIG_LIBELF) += genelf.o -libperf-$(CONFIG_LIBELF) += genelf_debug.o +libperf-$(CONFIG_DWARF) += genelf_debug.o endif CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index c1ef805c6a8f..14a73acc549c 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -19,7 +19,9 @@ #include #include #include +#ifdef HAVE_DWARF_SUPPORT #include +#endif #include "perf.h" #include "genelf.h" @@ -157,7 +159,7 @@ gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *cod int jit_write_elf(int fd, uint64_t load_addr, const char *sym, const void *code, int csize, - void *debug, int nr_debug_entries) + void *debug __maybe_unused, int nr_debug_entries __maybe_unused) { Elf *e; Elf_Data *d; @@ -386,11 +388,14 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_size = sizeof(bnote); shdr->sh_entsize = 0; +#ifdef HAVE_DWARF_SUPPORT if (debug && nr_debug_entries) { retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); if (retval) goto error; - } else { + } else +#endif + { if (elf_update(e, ELF_C_WRITE) < 0) { warnx("elf_update 4 failed"); goto error; diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 2fbeb59c4bdd..5c933ac71451 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -4,8 +4,10 @@ /* genelf.c */ int jit_write_elf(int fd, uint64_t code_addr, const char *sym, const void *code, int csize, void *debug, int nr_debug_entries); +#ifdef HAVE_DWARF_SUPPORT /* genelf_debug.c */ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); +#endif #if defined(__arm__) #define GEN_ELF_ARCH EM_ARM -- GitLab From 97136d1ef47aa2bd5efc4061fbca364ba4e6a138 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 30 Oct 2016 17:22:19 +0000 Subject: [PATCH 0734/1442] ARM: dts: bcm283x: fix typo in mailbox address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7d891a685dd46b925cf25b74ada0280a2531c34f upstream. The address of the mailbox node in the bcm283x.dtsi also has a typo. So fix it accordingly. Signed-off-by: Stefan Wahren Reviewed-by: Andreas Färber Fixes: 05b682b7a3b2 ("ARM: bcm2835: dt: Add the mailbox to the device tree") Signed-off-by: Eric Anholt Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm283x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 46d46d894a44..74dd21b7373c 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -104,7 +104,7 @@ reg = <0x7e104000 0x10>; }; - mailbox: mailbox@7e00b800 { + mailbox: mailbox@7e00b880 { compatible = "brcm,bcm2835-mbox"; reg = <0x7e00b880 0x40>; interrupts = <0 1>; -- GitLab From 57a10f29e0b3b407336293139459012259885ce2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 7 Nov 2016 20:10:04 +0100 Subject: [PATCH 0735/1442] ARM: dts: r8a7794: Use SYSC "always-on" PM Domain for sound commit 24b2d930a50662c11918fd0c22931f1448488da4 upstream. Hook up the Audio-DMAC and sound device nodes to the SYSC "always-on" PM Domain, for a more consistent device-power-area description in DT. Cfr. commit 0761ff2ad0c581f3 ("ARM: dts: r8a7794: Add SYSC PM Domains"). Fixes: 320d6c5a08a4abd3 ("ARM: dts: r8a7794: add sound support") Fixes: 298e4ee3d213a076 ("ARM: dts: r8a7794: add Audio-DMAC support") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/r8a7794.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/r8a7794.dtsi b/arch/arm/boot/dts/r8a7794.dtsi index 725ecb3c5fb4..a9368010fb8d 100644 --- a/arch/arm/boot/dts/r8a7794.dtsi +++ b/arch/arm/boot/dts/r8a7794.dtsi @@ -319,7 +319,7 @@ "ch12"; clocks = <&mstp5_clks R8A7794_CLK_AUDIO_DMAC0>; clock-names = "fck"; - power-domains = <&cpg_clocks>; + power-domains = <&sysc R8A7794_PD_ALWAYS_ON>; #dma-cells = <1>; dma-channels = <13>; }; @@ -1483,7 +1483,7 @@ "mix.0", "mix.1", "dvc.0", "dvc.1", "clk_a", "clk_b", "clk_c", "clk_i"; - power-domains = <&cpg_clocks>; + power-domains = <&sysc R8A7794_PD_ALWAYS_ON>; status = "disabled"; -- GitLab From 387812143cbede75658d267ad224e653b83b498b Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 30 Oct 2016 00:31:27 +0300 Subject: [PATCH 0736/1442] ARM: dts: r8a7794: remove Z clock commit 68cc085a4daaa32f7138de1e918331c05165a484 upstream. R8A7794 doesn't have Cortex-A15 CPUs, thus there's no Z clock... Fixes: 0dce5454d5c2 ("ARM: shmobile: Initial r8a7794 SoC device tree") Signed-off-by: Sergei Shtylyov Reviewed-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/r8a7794.dtsi | 3 +-- include/dt-bindings/clock/r8a7794-clock.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/r8a7794.dtsi b/arch/arm/boot/dts/r8a7794.dtsi index a9368010fb8d..7e860d3737ff 100644 --- a/arch/arm/boot/dts/r8a7794.dtsi +++ b/arch/arm/boot/dts/r8a7794.dtsi @@ -1025,8 +1025,7 @@ clocks = <&extal_clk &usb_extal_clk>; #clock-cells = <1>; clock-output-names = "main", "pll0", "pll1", "pll3", - "lb", "qspi", "sdh", "sd0", "z", - "rcan"; + "lb", "qspi", "sdh", "sd0", "rcan"; #power-domain-cells = <0>; }; /* Variable factor clocks */ diff --git a/include/dt-bindings/clock/r8a7794-clock.h b/include/dt-bindings/clock/r8a7794-clock.h index 9d02f5317c7c..88e64846cf37 100644 --- a/include/dt-bindings/clock/r8a7794-clock.h +++ b/include/dt-bindings/clock/r8a7794-clock.h @@ -20,8 +20,7 @@ #define R8A7794_CLK_QSPI 5 #define R8A7794_CLK_SDH 6 #define R8A7794_CLK_SD0 7 -#define R8A7794_CLK_Z 8 -#define R8A7794_CLK_RCAN 9 +#define R8A7794_CLK_RCAN 8 /* MSTP0 */ #define R8A7794_CLK_MSIOF0 0 -- GitLab From e95bd2ca11a9b680a46ac4af6167d5299753a1f5 Mon Sep 17 00:00:00 2001 From: Christopher Spinrath Date: Fri, 11 Nov 2016 16:59:38 +0100 Subject: [PATCH 0737/1442] ARM: dts: imx6q-cm-fx6: fix fec pinctrl commit 72649a46067903d00f46e2ebef6543768224f1a0 upstream. According to the schematics of CompuLab's sbc-fx6 baseboard and the vendor devicetree GPIO_16 is *not* muxed to ENET_REF_CLK but to SPDIF_IN. Remove the wrong pinctrl setting. Fixes: 682d055e6ac5 ("ARM: dts: Add initial support for cm-fx6.") Signed-off-by: Christopher Spinrath Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx6q-cm-fx6.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6q-cm-fx6.dts b/arch/arm/boot/dts/imx6q-cm-fx6.dts index 59bc5a4dce17..a150bca84daa 100644 --- a/arch/arm/boot/dts/imx6q-cm-fx6.dts +++ b/arch/arm/boot/dts/imx6q-cm-fx6.dts @@ -183,7 +183,6 @@ MX6QDL_PAD_ENET_REF_CLK__ENET_TX_CLK 0x1b0b0 MX6QDL_PAD_ENET_MDIO__ENET_MDIO 0x1b0b0 MX6QDL_PAD_ENET_MDC__ENET_MDC 0x1b0b0 - MX6QDL_PAD_GPIO_16__ENET_REF_CLK 0x4001b0a8 >; }; -- GitLab From 979f63227c98004635b6c329749618372a3ff4bf Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Mon, 26 Sep 2016 03:03:40 +0300 Subject: [PATCH 0738/1442] ARM: dts: imx31: fix clock control module interrupts description commit 2e575cbc930901718cc18e084566ecbb9a4b5ebb upstream. The type of AVIC interrupt controller found on i.MX31 is one-cell, namely 31 for CCM DVFS and 53 for CCM, however for clock control module its interrupts are specified as 3-cells, fix it. Fixes: ef0e4a606fb6 ("ARM: mx31: Replace clk_register_clkdev with clock DT lookup") Acked-by: Rob Herring Signed-off-by: Vladimir Zapolskiy Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/clock/imx31-clock.txt | 2 +- arch/arm/boot/dts/imx31.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/imx31-clock.txt b/Documentation/devicetree/bindings/clock/imx31-clock.txt index 19df842c694f..8163d565f697 100644 --- a/Documentation/devicetree/bindings/clock/imx31-clock.txt +++ b/Documentation/devicetree/bindings/clock/imx31-clock.txt @@ -77,7 +77,7 @@ Examples: clks: ccm@53f80000{ compatible = "fsl,imx31-ccm"; reg = <0x53f80000 0x4000>; - interrupts = <0 31 0x04 0 53 0x04>; + interrupts = <31>, <53>; #clock-cells = <1>; }; diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi index 1ce7ae94e7ad..3e0893ea2581 100644 --- a/arch/arm/boot/dts/imx31.dtsi +++ b/arch/arm/boot/dts/imx31.dtsi @@ -122,7 +122,7 @@ clks: ccm@53f80000{ compatible = "fsl,imx31-ccm"; reg = <0x53f80000 0x4000>; - interrupts = <0 31 0x04 0 53 0x04>; + interrupts = <31>, <53>; #clock-cells = <1>; }; }; -- GitLab From 173470b1b10a853a0b11cbf363c8b50087aa3962 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Mon, 26 Sep 2016 03:03:41 +0300 Subject: [PATCH 0739/1442] ARM: dts: imx31: move CCM device node to AIPS2 bus devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1f87aee6a2e55eda466a43ba6248a8b75eede153 upstream. i.MX31 Clock Control Module controller is found on AIPS2 bus, move it there from SPBA bus to avoid a conflict of device IO space mismatch. Fixes: ef0e4a606fb6 ("ARM: mx31: Replace clk_register_clkdev with clock DT lookup") Signed-off-by: Vladimir Zapolskiy Acked-by: Uwe Kleine-König Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx31.dtsi | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi index 3e0893ea2581..8d4c0e3533fa 100644 --- a/arch/arm/boot/dts/imx31.dtsi +++ b/arch/arm/boot/dts/imx31.dtsi @@ -118,13 +118,6 @@ interrupts = <19>; clocks = <&clks 25>; }; - - clks: ccm@53f80000{ - compatible = "fsl,imx31-ccm"; - reg = <0x53f80000 0x4000>; - interrupts = <31>, <53>; - #clock-cells = <1>; - }; }; aips@53f00000 { /* AIPS2 */ @@ -134,6 +127,13 @@ reg = <0x53f00000 0x100000>; ranges; + clks: ccm@53f80000{ + compatible = "fsl,imx31-ccm"; + reg = <0x53f80000 0x4000>; + interrupts = <31>, <53>; + #clock-cells = <1>; + }; + gpt: timer@53f90000 { compatible = "fsl,imx31-gpt"; reg = <0x53f90000 0x4000>; -- GitLab From efba7a37efe28ea53ccecbb377983d11a938df10 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 17 Nov 2016 03:30:51 +0200 Subject: [PATCH 0740/1442] ARM: dts: imx31: fix AVIC base address commit af92305e567b7f4c9cf48b9e46c1f48ec9ffb1fb upstream. On i.MX31 AVIC interrupt controller base address is at 0x68000000. The problem was shadowed by the AVIC driver, which takes the correct base address from a SoC specific header file. Fixes: d2a37b3d91f4 ("ARM i.MX31: Add devicetree support") Signed-off-by: Vladimir Zapolskiy Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx31.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi index 8d4c0e3533fa..11e9e6bd8abb 100644 --- a/arch/arm/boot/dts/imx31.dtsi +++ b/arch/arm/boot/dts/imx31.dtsi @@ -30,11 +30,11 @@ }; }; - avic: avic-interrupt-controller@60000000 { + avic: interrupt-controller@68000000 { compatible = "fsl,imx31-avic", "fsl,avic"; interrupt-controller; #interrupt-cells = <1>; - reg = <0x60000000 0x100000>; + reg = <0x68000000 0x100000>; }; soc { -- GitLab From 9170948a24b7ad73ea6511c5d37984f9475b699f Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 3 Dec 2016 04:57:23 -0600 Subject: [PATCH 0741/1442] ARM: dts: omap3: Add DTS for Logic PD SOM-LV 37xx Dev Kit commit 7245f67f86e847769f41dacad26bb8f5b5d74bf4 upstream. Fixes: ("ab8dd3aed011 ARM: DTS: Add minimal Support for Logic PD DM3730 SOM-LV") This adds the dts file into the Makefile. This should have been included in the original patch. V2: Update patch description - same source code V1: Original patch Reviewed-by: Javier Martinez Canillas Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index c558ba75cbcc..7037201c5e3a 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -485,6 +485,7 @@ dtb-$(CONFIG_ARCH_OMAP3) += \ am3517-evm.dtb \ am3517_mt_ventoux.dtb \ logicpd-torpedo-37xx-devkit.dtb \ + logicpd-som-lv-37xx-devkit.dtb \ omap3430-sdp.dtb \ omap3-beagle.dtb \ omap3-beagle-xm.dtb \ -- GitLab From 782b361c93062f083bbc9a78928498218f950399 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 9 Jan 2017 09:34:48 +0800 Subject: [PATCH 0742/1442] tmpfs: clear S_ISGID when setting posix ACLs commit 497de07d89c1410d76a15bec2bb41f24a2a89f31 upstream. This change was missed the tmpfs modification in In CVE-2016-7097 commit 073931017b49 ("posix_acl: Clear SGID bit when setting file permissions") It can test by xfstest generic/375, which failed to clear setgid bit in the following test case on tmpfs: touch $testfile chown 100:100 $testfile chmod 2755 $testfile _runas -u 100 -g 101 -- setfacl -m u::rwx,g::rwx,o::rwx $testfile Signed-off-by: Gu Zheng Signed-off-by: Al Viro Cc: Brad Spengler Signed-off-by: Greg Kroah-Hartman --- fs/posix_acl.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 595522022aca..c9d48dc78495 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -922,11 +922,10 @@ int simple_set_acl(struct inode *inode, struct posix_acl *acl, int type) int error; if (type == ACL_TYPE_ACCESS) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) - return 0; - if (error == 0) - acl = NULL; + error = posix_acl_update_mode(inode, + &inode->i_mode, &acl); + if (error) + return error; } inode->i_ctime = current_time(inode); -- GitLab From f779e00d2e86df2c9409c7cc8c054113c5086d1b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 28 Dec 2016 14:55:16 -0600 Subject: [PATCH 0743/1442] x86/PCI: Ignore _CRS on Supermicro X8DTH-i/6/iF/6F commit 89e9f7bcd8744ea25fcf0ac671b8d72c10d7d790 upstream. Martin reported that the Supermicro X8DTH-i/6/iF/6F advertises incorrect host bridge windows via _CRS: pci_root PNP0A08:00: host bridge window [io 0xf000-0xffff] pci_root PNP0A08:01: host bridge window [io 0xf000-0xffff] Both bridges advertise the 0xf000-0xffff window, which cannot be correct. Work around this by ignoring _CRS on this system. The downside is that we may not assign resources correctly to hot-added PCI devices (if they are possible on this system). Link: https://bugzilla.kernel.org/show_bug.cgi?id=42606 Reported-by: Martin Burnicki Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/acpi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 3cd69832d7f4..3961103e9176 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -114,6 +114,16 @@ static const struct dmi_system_id pci_crs_quirks[] __initconst = { DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"), }, }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=42606 */ + { + .callback = set_nouse_crs, + .ident = "Supermicro X8DTH", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), + DMI_MATCH(DMI_PRODUCT_NAME, "X8DTH-i/6/iF/6F"), + DMI_MATCH(DMI_BIOS_VERSION, "2.0a"), + }, + }, /* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */ { -- GitLab From bdeaa468e22171497b289c2f31d20ab6b318d53c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Jan 2017 23:23:15 -0800 Subject: [PATCH 0744/1442] rcu: Remove cond_resched() from Tiny synchronize_sched() commit f466ae66fa6a599f9a53b5f9bafea4b8cfffa7fb upstream. It is now legal to invoke synchronize_sched() at early boot, which causes Tiny RCU's synchronize_sched() to emit spurious splats. This commit therefore removes the cond_resched() from Tiny RCU's synchronize_sched(). Fixes: 8b355e3bc140 ("rcu: Drive expedited grace periods from workqueue") Signed-off-by: Paul E. McKenney Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tiny.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 1898559e6b60..b23a4d076f3d 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -185,9 +185,6 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused * benefits of doing might_sleep() to reduce latency.) * * Cool, huh? (Due to Josh Triplett.) - * - * But we want to make this a static inline later. The cond_resched() - * currently makes this problematic. */ void synchronize_sched(void) { @@ -195,7 +192,6 @@ void synchronize_sched(void) lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_sched() in RCU read-side critical section"); - cond_resched(); } EXPORT_SYMBOL_GPL(synchronize_sched); -- GitLab From 90687fc3c8c386a16326089d68cf616b8049440f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Jan 2017 02:28:26 -0800 Subject: [PATCH 0745/1442] rcu: Narrow early boot window of illegal synchronous grace periods commit 52d7e48b86fc108e45a656d8e53e4237993c481d upstream. The current preemptible RCU implementation goes through three phases during bootup. In the first phase, there is only one CPU that is running with preemption disabled, so that a no-op is a synchronous grace period. In the second mid-boot phase, the scheduler is running, but RCU has not yet gotten its kthreads spawned (and, for expedited grace periods, workqueues are not yet running. During this time, any attempt to do a synchronous grace period will hang the system (or complain bitterly, depending). In the third and final phase, RCU is fully operational and everything works normally. This has been OK for some time, but there has recently been some synchronous grace periods showing up during the second mid-boot phase. This code worked "by accident" for awhile, but started failing as soon as expedited RCU grace periods switched over to workqueues in commit 8b355e3bc140 ("rcu: Drive expedited grace periods from workqueue"). Note that the code was buggy even before this commit, as it was subject to failure on real-time systems that forced all expedited grace periods to run as normal grace periods (for example, using the rcu_normal ksysfs parameter). The callchain from the failure case is as follows: early_amd_iommu_init() |-> acpi_put_table(ivrs_base); |-> acpi_tb_put_table(table_desc); |-> acpi_tb_invalidate_table(table_desc); |-> acpi_tb_release_table(...) |-> acpi_os_unmap_memory |-> acpi_os_unmap_iomem |-> acpi_os_map_cleanup |-> synchronize_rcu_expedited The kernel showing this callchain was built with CONFIG_PREEMPT_RCU=y, which caused the code to try using workqueues before they were initialized, which did not go well. This commit therefore reworks RCU to permit synchronous grace periods to proceed during this mid-boot phase. This commit is therefore a fix to a regression introduced in v4.9, and is therefore being put forward post-merge-window in v4.10. This commit sets a flag from the existing rcu_scheduler_starting() function which causes all synchronous grace periods to take the expedited path. The expedited path now checks this flag, using the requesting task to drive the expedited grace period forward during the mid-boot phase. Finally, this flag is updated by a core_initcall() function named rcu_exp_runtime_mode(), which causes the runtime codepaths to be used. Note that this arrangement assumes that tasks are not sent POSIX signals (or anything similar) from the time that the first task is spawned through core_initcall() time. Fixes: 8b355e3bc140 ("rcu: Drive expedited grace periods from workqueue") Reported-by: "Zheng, Lv" Reported-by: Borislav Petkov Signed-off-by: Paul E. McKenney Tested-by: Stan Kain Tested-by: Ivan Tested-by: Emanuel Castelo Tested-by: Bruno Pesavento Tested-by: Borislav Petkov Tested-by: Frederic Bezies Signed-off-by: Greg Kroah-Hartman --- include/linux/rcupdate.h | 4 ++++ kernel/rcu/rcu.h | 1 + kernel/rcu/tiny_plugin.h | 9 +++++-- kernel/rcu/tree.c | 33 +++++++++++++++---------- kernel/rcu/tree_exp.h | 52 +++++++++++++++++++++++++++++++--------- kernel/rcu/tree_plugin.h | 2 +- kernel/rcu/update.c | 38 ++++++++++++++++++++++------- 7 files changed, 104 insertions(+), 35 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 321f9ed552a9..01f71e1d2e94 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -444,6 +444,10 @@ bool __rcu_is_watching(void); #error "Unknown RCU implementation specified to kernel configuration" #endif +#define RCU_SCHEDULER_INACTIVE 0 +#define RCU_SCHEDULER_INIT 1 +#define RCU_SCHEDULER_RUNNING 2 + /* * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic * initialization and destruction of rcu_head on the stack. rcu_head structures diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 80adef7d4c3d..0d6ff3e471be 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -136,6 +136,7 @@ int rcu_jiffies_till_stall_check(void); #define TPS(x) tracepoint_string(x) void rcu_early_boot_tests(void); +void rcu_test_sync_prims(void); /* * This function really isn't for public consumption, but RCU is special in diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 196f0302e2f4..c64b827ecbca 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -60,12 +60,17 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); /* * During boot, we forgive RCU lockdep issues. After this function is - * invoked, we start taking RCU lockdep issues seriously. + * invoked, we start taking RCU lockdep issues seriously. Note that unlike + * Tree RCU, Tiny RCU transitions directly from RCU_SCHEDULER_INACTIVE + * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage. + * The reason for this is that Tiny RCU does not need kthreads, so does + * not have to care about the fact that the scheduler is half-initialized + * at a certain phase of the boot process. */ void __init rcu_scheduler_starting(void) { WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; } #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 69a5611a7e7c..10f62c6f48e7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -127,13 +127,16 @@ int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ int sysctl_panic_on_rcu_stall __read_mostly; /* - * The rcu_scheduler_active variable transitions from zero to one just - * before the first task is spawned. So when this variable is zero, RCU - * can assume that there is but one task, allowing RCU to (for example) + * The rcu_scheduler_active variable is initialized to the value + * RCU_SCHEDULER_INACTIVE and transitions RCU_SCHEDULER_INIT just before the + * first task is spawned. So when this variable is RCU_SCHEDULER_INACTIVE, + * RCU can assume that there is but one task, allowing RCU to (for example) * optimize synchronize_rcu() to a simple barrier(). When this variable - * is one, RCU must actually do all the hard work required to detect real - * grace periods. This variable is also used to suppress boot-time false - * positives from lockdep-RCU error checking. + * is RCU_SCHEDULER_INIT, RCU must actually do all the hard work required + * to detect real grace periods. This variable is also used to suppress + * boot-time false positives from lockdep-RCU error checking. Finally, it + * transitions from RCU_SCHEDULER_INIT to RCU_SCHEDULER_RUNNING after RCU + * is fully initialized, including all of its kthreads having been spawned. */ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); @@ -3985,18 +3988,22 @@ static int __init rcu_spawn_gp_kthread(void) early_initcall(rcu_spawn_gp_kthread); /* - * This function is invoked towards the end of the scheduler's initialization - * process. Before this is called, the idle task might contain - * RCU read-side critical sections (during which time, this idle - * task is booting the system). After this function is called, the - * idle tasks are prohibited from containing RCU read-side critical - * sections. This function also enables RCU lockdep checking. + * This function is invoked towards the end of the scheduler's + * initialization process. Before this is called, the idle task might + * contain synchronous grace-period primitives (during which time, this idle + * task is booting the system, and such primitives are no-ops). After this + * function is called, any synchronous grace-period primitives are run as + * expedited, with the requesting task driving the grace period forward. + * A later core_initcall() rcu_exp_runtime_mode() will switch to full + * runtime RCU functionality. */ void rcu_scheduler_starting(void) { WARN_ON(num_online_cpus() != 1); WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; + rcu_test_sync_prims(); + rcu_scheduler_active = RCU_SCHEDULER_INIT; + rcu_test_sync_prims(); } /* diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 24343eb87b58..78eba4120d46 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -521,6 +521,20 @@ struct rcu_exp_work { struct work_struct rew_work; }; +/* + * Common code to drive an expedited grace period forward, used by + * workqueues and mid-boot-time tasks. + */ +static void rcu_exp_sel_wait_wake(struct rcu_state *rsp, + smp_call_func_t func, unsigned long s) +{ + /* Initialize the rcu_node tree in preparation for the wait. */ + sync_rcu_exp_select_cpus(rsp, func); + + /* Wait and clean up, including waking everyone. */ + rcu_exp_wait_wake(rsp, s); +} + /* * Work-queue handler to drive an expedited grace period forward. */ @@ -528,12 +542,8 @@ static void wait_rcu_exp_gp(struct work_struct *wp) { struct rcu_exp_work *rewp; - /* Initialize the rcu_node tree in preparation for the wait. */ rewp = container_of(wp, struct rcu_exp_work, rew_work); - sync_rcu_exp_select_cpus(rewp->rew_rsp, rewp->rew_func); - - /* Wait and clean up, including waking everyone. */ - rcu_exp_wait_wake(rewp->rew_rsp, rewp->rew_s); + rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s); } /* @@ -559,12 +569,18 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp, if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ - /* Marshall arguments and schedule the expedited grace period. */ - rew.rew_func = func; - rew.rew_rsp = rsp; - rew.rew_s = s; - INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); - schedule_work(&rew.rew_work); + /* Ensure that load happens before action based on it. */ + if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) { + /* Direct call during scheduler init and early_initcalls(). */ + rcu_exp_sel_wait_wake(rsp, func, s); + } else { + /* Marshall arguments & schedule the expedited grace period. */ + rew.rew_func = func; + rew.rew_rsp = rsp; + rew.rew_s = s; + INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); + schedule_work(&rew.rew_work); + } /* Wait for expedited grace period to complete. */ rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); @@ -666,6 +682,8 @@ void synchronize_rcu_expedited(void) { struct rcu_state *rsp = rcu_state_p; + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) + return; _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); @@ -683,3 +701,15 @@ void synchronize_rcu_expedited(void) EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + +/* + * Switch to run-time mode once Tree RCU has fully initialized. + */ +static int __init rcu_exp_runtime_mode(void) +{ + rcu_test_sync_prims(); + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; + rcu_test_sync_prims(); + return 0; +} +core_initcall(rcu_exp_runtime_mode); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 85c5a883c6e3..56583e764ebf 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -670,7 +670,7 @@ void synchronize_rcu(void) lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_rcu() in RCU read-side critical section"); - if (!rcu_scheduler_active) + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) return; if (rcu_gp_is_expedited()) synchronize_rcu_expedited(); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index f19271dce0a9..4f6db7e6a117 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -121,11 +121,14 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held); * Should expedited grace-period primitives always fall back to their * non-expedited counterparts? Intended for use within RCU. Note * that if the user specifies both rcu_expedited and rcu_normal, then - * rcu_normal wins. + * rcu_normal wins. (Except during the time period during boot from + * when the first task is spawned until the rcu_exp_runtime_mode() + * core_initcall() is invoked, at which point everything is expedited.) */ bool rcu_gp_is_normal(void) { - return READ_ONCE(rcu_normal); + return READ_ONCE(rcu_normal) && + rcu_scheduler_active != RCU_SCHEDULER_INIT; } EXPORT_SYMBOL_GPL(rcu_gp_is_normal); @@ -135,13 +138,14 @@ static atomic_t rcu_expedited_nesting = /* * Should normal grace-period primitives be expedited? Intended for * use within RCU. Note that this function takes the rcu_expedited - * sysfs/boot variable into account as well as the rcu_expedite_gp() - * nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited() - * returns false is a -really- bad idea. + * sysfs/boot variable and rcu_scheduler_active into account as well + * as the rcu_expedite_gp() nesting. So looping on rcu_unexpedite_gp() + * until rcu_gp_is_expedited() returns false is a -really- bad idea. */ bool rcu_gp_is_expedited(void) { - return rcu_expedited || atomic_read(&rcu_expedited_nesting); + return rcu_expedited || atomic_read(&rcu_expedited_nesting) || + rcu_scheduler_active == RCU_SCHEDULER_INIT; } EXPORT_SYMBOL_GPL(rcu_gp_is_expedited); @@ -257,7 +261,7 @@ EXPORT_SYMBOL_GPL(rcu_callback_map); int notrace debug_lockdep_rcu_enabled(void) { - return rcu_scheduler_active && debug_locks && + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks && current->lockdep_recursion == 0; } EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); @@ -591,7 +595,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks); void synchronize_rcu_tasks(void) { /* Complain if the scheduler has not started. */ - RCU_LOCKDEP_WARN(!rcu_scheduler_active, + RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, "synchronize_rcu_tasks called too soon"); /* Wait for the grace period. */ @@ -813,6 +817,23 @@ static void rcu_spawn_tasks_kthread(void) #endif /* #ifdef CONFIG_TASKS_RCU */ +/* + * Test each non-SRCU synchronous grace-period wait API. This is + * useful just after a change in mode for these primitives, and + * during early boot. + */ +void rcu_test_sync_prims(void) +{ + if (!IS_ENABLED(CONFIG_PROVE_RCU)) + return; + synchronize_rcu(); + synchronize_rcu_bh(); + synchronize_sched(); + synchronize_rcu_expedited(); + synchronize_rcu_bh_expedited(); + synchronize_sched_expedited(); +} + #ifdef CONFIG_PROVE_RCU /* @@ -865,6 +886,7 @@ void rcu_early_boot_tests(void) early_boot_test_call_rcu_bh(); if (rcu_self_test_sched) early_boot_test_call_rcu_sched(); + rcu_test_sync_prims(); } static int rcu_verify_early_boot_tests(void) -- GitLab From a297ed84b92846963b4a7c5efd90910cea9c39a5 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 5 Jan 2017 16:34:51 -0500 Subject: [PATCH 0746/1442] sunrpc: don't call sleeping functions from the notifier block callbacks commit 546125d1614264d26080817d0c8cddb9b25081fa upstream. The inet6addr_chain is an atomic notifier chain, so we can't call anything that might sleep (like lock_sock)... instead of closing the socket from svc_age_temp_xprts_now (which is called by the notifier function), just have the rpc service threads do it instead. Fixes: c3d4879e01be "sunrpc: Add a function to close..." Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index e5d193440374..7440290f64ac 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -66,6 +66,7 @@ struct svc_xprt { #define XPT_LISTENER 10 /* listening endpoint */ #define XPT_CACHE_AUTH 11 /* cache auth info */ #define XPT_LOCAL 12 /* connection from loopback interface */ +#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 3bc1d61694cb..9c9db55a0c1e 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -799,6 +799,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { dprintk("svc_recv: found XPT_CLOSE\n"); + if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags)) + xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_delete_xprt(xprt); /* Leave XPT_BUSY set on the dead xprt: */ goto out; @@ -1020,9 +1022,11 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) le = to_be_closed.next; list_del_init(le); xprt = list_entry(le, struct svc_xprt, xpt_list); - dprintk("svc_age_temp_xprts_now: closing %p\n", xprt); - xprt->xpt_ops->xpo_kill_temp_xprt(xprt); - svc_close_xprt(xprt); + set_bit(XPT_CLOSE, &xprt->xpt_flags); + set_bit(XPT_KILL_TEMP, &xprt->xpt_flags); + dprintk("svc_age_temp_xprts_now: queuing xprt %p for closing\n", + xprt); + svc_xprt_enqueue(xprt); } } EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now); -- GitLab From f29f3616b9328e17f902f1839634b2440ca07982 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 9 Jan 2017 17:15:18 -0500 Subject: [PATCH 0747/1442] svcrpc: don't leak contexts on PROC_DESTROY commit 78794d1890708cf94e3961261e52dcec2cc34722 upstream. Context expiry times are in units of seconds since boot, not unix time. The use of get_seconds() here therefore sets the expiry time decades in the future. This prevents timely freeing of contexts destroyed by client RPC_GSS_PROC_DESTROY requests. We'd still free them eventually (when the module is unloaded or the container shut down), but a lot of contexts could pile up before then. Fixes: c5b29f885afe "sunrpc: use seconds since boot in expiry cache" Reported-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 45662d7f0943..6fdffde28733 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1489,7 +1489,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_PROC_DESTROY: if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; - rsci->h.expiry_time = get_seconds(); + rsci->h.expiry_time = seconds_since_boot(); set_bit(CACHE_NEGATIVE, &rsci->h.flags); if (resv->iov_len + 4 > PAGE_SIZE) goto drop; -- GitLab From 3c4d83a1a41efdf2c191d0072e6545e6bf8ecc94 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 9 Jan 2017 17:30:49 -0800 Subject: [PATCH 0748/1442] libnvdimm, namespace: fix pmem namespace leak, delete when size set to zero commit 1f19b983a8877f81763fab3e693c6befe212736d upstream. Commit 98a29c39dc68 ("libnvdimm, namespace: allow creation of multiple pmem-namespaces per region") added support for establishing additional pmem namespace beyond the seed device, similar to blk namespaces. However, it neglected to delete the namespace when the size is set to zero. Fixes: 98a29c39dc68 ("libnvdimm, namespace: allow creation of multiple pmem-namespaces per region") Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/nvdimm/namespace_devs.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index abe5c6bc756c..1480734c2d6e 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -957,6 +957,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) { resource_size_t allocated = 0, available = 0; struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_common *ndns = to_ndns(dev); struct nd_mapping *nd_mapping; struct nvdimm_drvdata *ndd; struct nd_label_id label_id; @@ -964,7 +965,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) u8 *uuid = NULL; int rc, i; - if (dev->driver || to_ndns(dev)->claim) + if (dev->driver || ndns->claim) return -EBUSY; if (is_namespace_pmem(dev)) { @@ -1034,20 +1035,16 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) nd_namespace_pmem_set_resource(nd_region, nspm, val * nd_region->ndr_mappings); - } else if (is_namespace_blk(dev)) { - struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); - - /* - * Try to delete the namespace if we deleted all of its - * allocation, this is not the seed device for the - * region, and it is not actively claimed by a btt - * instance. - */ - if (val == 0 && nd_region->ns_seed != dev - && !nsblk->common.claim) - nd_device_unregister(dev, ND_ASYNC); } + /* + * Try to delete the namespace if we deleted all of its + * allocation, this is not the seed device for the region, and + * it is not actively claimed by a btt instance. + */ + if (val == 0 && nd_region->ns_seed != dev && !ndns->claim) + nd_device_unregister(dev, ND_ASYNC); + return rc; } -- GitLab From bb82fb48df8cab8f902052ce03f6d51b9b8f1bcd Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 16 Jan 2017 10:48:09 -0800 Subject: [PATCH 0749/1442] ARC: module: Fix !CONFIG_ARC_DW2_UNWIND builds commit eb1357d942e5d96de6b4c20a8ffa55acf96233a2 upstream. commit d65283f7b695b5 added mod->arch.secstr under CONFIG_ARC_DW2_UNWIND, but used it unconditionally which broke builds when the option was disabled. Fix that by adjusting the #ifdef guard. And while at it add a missing guard (for unwinder) in module.c as well Reported-by: Waldemar Brodkorb Fixes: d65283f7b695b5 ("ARC: module: elide loop to save reference to .eh_frame") Tested-by: Anton Kolesov Reviewed-by: Alexey Brodkin [abrodkin: provided fixlet to Kconfig per failure in allnoconfig build] Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/Kconfig | 2 +- arch/arc/include/asm/module.h | 4 ++-- arch/arc/kernel/module.c | 4 +++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index bd204bfa29ed..249e10190d20 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -28,7 +28,7 @@ config ARC select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_MEMBLOCK - select HAVE_MOD_ARCH_SPECIFIC if ARC_DW2_UNWIND + select HAVE_MOD_ARCH_SPECIFIC select HAVE_OPROFILE select HAVE_PERF_EVENTS select HANDLE_DOMAIN_IRQ diff --git a/arch/arc/include/asm/module.h b/arch/arc/include/asm/module.h index 6e91d8b339c3..567590ea8f6c 100644 --- a/arch/arc/include/asm/module.h +++ b/arch/arc/include/asm/module.h @@ -14,13 +14,13 @@ #include -#ifdef CONFIG_ARC_DW2_UNWIND struct mod_arch_specific { +#ifdef CONFIG_ARC_DW2_UNWIND void *unw_info; int unw_sec_idx; +#endif const char *secstr; }; -#endif #define MODULE_PROC_FAMILY "ARC700" diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c index 42e964db2967..3d99a6091332 100644 --- a/arch/arc/kernel/module.c +++ b/arch/arc/kernel/module.c @@ -32,8 +32,8 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, #ifdef CONFIG_ARC_DW2_UNWIND mod->arch.unw_sec_idx = 0; mod->arch.unw_info = NULL; - mod->arch.secstr = secstr; #endif + mod->arch.secstr = secstr; return 0; } @@ -113,8 +113,10 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, } +#ifdef CONFIG_ARC_DW2_UNWIND if (strcmp(module->arch.secstr+sechdrs[tgtsec].sh_name, ".eh_frame") == 0) module->arch.unw_sec_idx = tgtsec; +#endif return 0; -- GitLab From 0181b3603681d03e5753079cef8b17d2905faee5 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Thu, 12 Jan 2017 12:04:04 -0800 Subject: [PATCH 0750/1442] fuse: clear FR_PENDING flag when moving requests out of pending queue commit a8a86d78d673b1c99fe9b0064739fde9e9774184 upstream. fuse_abort_conn() moves requests from pending list to a temporary list before canceling them. This operation races with request_wait_answer() which also tries to remove the request after it gets a fatal signal. It checks FR_PENDING flag to determine whether the request is still in the pending list. Make fuse_abort_conn() clear FR_PENDING flag so that request_wait_answer() does not remove the request from temporary list. This bug causes an Oops when trying to delete an already deleted list entry in end_requests(). Fixes: ee314a870e40 ("fuse: abort: no fc->lock needed for request ending") Signed-off-by: Tahsin Erdogan Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 70ea57c7b6bb..4e06a27ed7f8 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2025,7 +2025,6 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head) struct fuse_req *req; req = list_entry(head->next, struct fuse_req, list); req->out.h.error = -ECONNABORTED; - clear_bit(FR_PENDING, &req->flags); clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); request_end(fc, req); @@ -2103,6 +2102,8 @@ void fuse_abort_conn(struct fuse_conn *fc) spin_lock(&fiq->waitq.lock); fiq->connected = 0; list_splice_init(&fiq->pending, &to_end2); + list_for_each_entry(req, &to_end2, list) + clear_bit(FR_PENDING, &req->flags); while (forget_pending(fiq)) kfree(dequeue_forget(fiq, 1, NULL)); wake_up_all_locked(&fiq->waitq); -- GitLab From 07f0267561c6cbe4a87c9a05134d2070f8b4a2ee Mon Sep 17 00:00:00 2001 From: David Sheets Date: Fri, 13 Jan 2017 15:58:30 +0000 Subject: [PATCH 0751/1442] fuse: fix time_to_jiffies nsec sanity check commit 210675270caa33253e4c33f3c5e657e7d6060812 upstream. Commit bcb6f6d2b9c2 ("fuse: use timespec64") introduced clamped nsec values in time_to_jiffies but used the max of nsec and NSEC_PER_SEC - 1 instead of the min. Because of this, dentries would stay in the cache longer than requested and go stale in scenarios that relied on their timely eviction. Fixes: bcb6f6d2b9c2 ("fuse: use timespec64") Signed-off-by: David Sheets Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 096f79997f75..642c57b8de7b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -68,7 +68,7 @@ static u64 time_to_jiffies(u64 sec, u32 nsec) if (sec || nsec) { struct timespec64 ts = { sec, - max_t(u32, nsec, NSEC_PER_SEC - 1) + min_t(u32, nsec, NSEC_PER_SEC - 1) }; return get_jiffies_64() + timespec64_to_jiffies(&ts); -- GitLab From c6bdc450fe93b54c394143a3e564a6730f3350ff Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Wed, 4 Jan 2017 14:32:30 -0500 Subject: [PATCH 0752/1442] PCI: designware: Check for iATU unroll only on platforms that use ATU commit a782b5f986c3fa1cfa7f2b57941200c6a5809242 upstream. Previously we checked for iATU unroll support by reading PCIE_ATU_VIEWPORT even on platforms, e.g., Keystone, that do not have ATU ports. This can cause bad behavior such as asynchronous external aborts: OF: PCI: MEM 0x60000000..0x6fffffff -> 0x60000000 Unhandled fault: asynchronous external abort (0x1211) at 0x00000000 pgd = c0003000 [00000000] *pgd=80000800004003, *pmd=00000000 Internal error: : 1211 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.9.0-00009-g6ff59d2-dirty #7 Hardware name: Keystone task: eb878000 task.stack: eb866000 PC is at dw_pcie_setup_rc+0x24/0x380 LR is at ks_pcie_host_init+0x10/0x170 Move the dw_pcie_iatu_unroll_enabled() check so we only call it on platforms that do not use the ATU. These platforms supply their own ->rd_other_conf() and ->wr_other_conf() methods. [bhelgaas: changelog] Fixes: a0601a470537 ("PCI: designware: Add iATU Unroll feature") Fixes: 416379f9ebde ("PCI: designware: Check for iATU unroll support after initializing host") Tested-by: Kishon Vijay Abraham I Signed-off-by: Murali Karicheri Signed-off-by: Bjorn Helgaas Acked-By: Joao Pinto Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pcie-designware.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c index bed19994c1e9..af8f6e92e885 100644 --- a/drivers/pci/host/pcie-designware.c +++ b/drivers/pci/host/pcie-designware.c @@ -807,11 +807,6 @@ void dw_pcie_setup_rc(struct pcie_port *pp) { u32 val; - /* get iATU unroll support */ - pp->iatu_unroll_enabled = dw_pcie_iatu_unroll_enabled(pp); - dev_dbg(pp->dev, "iATU unroll: %s\n", - pp->iatu_unroll_enabled ? "enabled" : "disabled"); - /* set the number of lanes */ val = dw_pcie_readl_rc(pp, PCIE_PORT_LINK_CONTROL); val &= ~PORT_LINK_MODE_MASK; @@ -882,6 +877,11 @@ void dw_pcie_setup_rc(struct pcie_port *pp) * we should not program the ATU here. */ if (!pp->ops->rd_other_conf) { + /* get iATU unroll support */ + pp->iatu_unroll_enabled = dw_pcie_iatu_unroll_enabled(pp); + dev_dbg(pp->dev, "iATU unroll: %s\n", + pp->iatu_unroll_enabled ? "enabled" : "disabled"); + dw_pcie_prog_outbound_atu(pp, PCIE_ATU_REGION_INDEX0, PCIE_ATU_TYPE_MEM, pp->mem_base, pp->mem_bus_addr, pp->mem_size); -- GitLab From 4922a6a5cfa7ff25622f07860d840e6b9c49f6a1 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 11 Jan 2017 09:11:53 -0600 Subject: [PATCH 0753/1442] PCI: Enumerate switches below PCI-to-PCIe bridges commit 51ebfc92b72b4f7dac1ab45683bf56741e454b8c upstream. A PCI-to-PCIe bridge (a "reverse bridge") has a PCI or PCI-X primary interface and a PCI Express secondary interface. The PCIe interface is a Downstream Port that originates a Link. See the "PCI Express to PCI/PCI-X Bridge Specification", rev 1.0, sections 1.2 and A.6. The bug report below involves a PCI-to-PCIe bridge and a PCIe switch below the bridge: 00:1e.0 Intel 82801 PCI Bridge to [bus 01-0a] 01:00.0 Pericom PI7C9X111SL PCIe-to-PCI Reversible Bridge to [bus 02-0a] 02:00.0 Pericom Device 8608 [PCIe Upstream Port] to [bus 03-0a] 03:01.0 Pericom Device 8608 [PCIe Downstream Port] to [bus 0a] 01:00.0 is configured as a PCI-to-PCIe bridge (despite the name printed by lspci). As we traverse a PCIe hierarchy, device connections alternate between PCIe Links and internal Switch logic. Previously we did not recognize that 01:00.0 had a secondary link, so we thought the 02:00.0 Upstream Port *did* have a secondary link. In fact, it's the other way around: 01:00.0 has a secondary link, and 02:00.0 has internal Switch logic on its secondary side. When we thought 02:00.0 had a secondary link, the pci_scan_slot() -> only_one_child() path assumed 02:00.0 could have only one child, so 03:00.0 was the only possible downstream device. But 03:00.0 doesn't exist, so we didn't look for any other devices on bus 03. Booting with "pci=pcie_scan_all" is a workaround, but we don't want users to have to do that. Recognize that PCI-to-PCIe bridges originate links on their secondary interfaces. Link: https://bugzilla.kernel.org/show_bug.cgi?id=189361 Fixes: d0751b98dfa3 ("PCI: Add dev->has_secondary_link to track downstream PCIe links") Tested-by: Blake Moore Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 104c46d53121..300770cdc084 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1050,6 +1050,7 @@ void set_pcie_port_type(struct pci_dev *pdev) pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); if (!pos) return; + pdev->pcie_cap = pos; pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); pdev->pcie_flags_reg = reg16; @@ -1057,13 +1058,14 @@ void set_pcie_port_type(struct pci_dev *pdev) pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD; /* - * A Root Port is always the upstream end of a Link. No PCIe - * component has two Links. Two Links are connected by a Switch - * that has a Port on each Link and internal logic to connect the - * two Ports. + * A Root Port or a PCI-to-PCIe bridge is always the upstream end + * of a Link. No PCIe component has two Links. Two Links are + * connected by a Switch that has a Port on each Link and internal + * logic to connect the two Ports. */ type = pci_pcie_type(pdev); - if (type == PCI_EXP_TYPE_ROOT_PORT) + if (type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_PCIE_BRIDGE) pdev->has_secondary_link = 1; else if (type == PCI_EXP_TYPE_UPSTREAM || type == PCI_EXP_TYPE_DOWNSTREAM) { -- GitLab From 63df1ccb937f67fbefa0a77415b3a4d10434a09b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Jan 2017 18:17:42 +0100 Subject: [PATCH 0754/1442] HID: corsair: fix DMA buffers on stack commit 6d104af38b570d37aa32a5803b04c354f8ed513d upstream. Not all platforms support DMA to the stack, and specifically since v4.9 this is no longer supported on x86 with VMAP_STACK either. Note that the macro-mode buffer was larger than necessary. Fixes: 6f78193ee9ea ("HID: corsair: Add Corsair Vengeance K90 driver") Signed-off-by: Johan Hovold Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-corsair.c | 54 ++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index 717704e9ae07..5971907a23b1 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c @@ -148,7 +148,11 @@ static enum led_brightness k90_backlight_get(struct led_classdev *led_cdev) struct usb_interface *usbif = to_usb_interface(dev->parent); struct usb_device *usbdev = interface_to_usbdev(usbif); int brightness; - char data[8]; + char *data; + + data = kmalloc(8, GFP_KERNEL); + if (!data) + return -ENOMEM; ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), K90_REQUEST_STATUS, @@ -158,16 +162,22 @@ static enum led_brightness k90_backlight_get(struct led_classdev *led_cdev) if (ret < 0) { dev_warn(dev, "Failed to get K90 initial state (error %d).\n", ret); - return -EIO; + ret = -EIO; + goto out; } brightness = data[4]; if (brightness < 0 || brightness > 3) { dev_warn(dev, "Read invalid backlight brightness: %02hhx.\n", data[4]); - return -EIO; + ret = -EIO; + goto out; } - return brightness; + ret = brightness; +out: + kfree(data); + + return ret; } static enum led_brightness k90_record_led_get(struct led_classdev *led_cdev) @@ -253,7 +263,11 @@ static ssize_t k90_show_macro_mode(struct device *dev, struct usb_interface *usbif = to_usb_interface(dev->parent); struct usb_device *usbdev = interface_to_usbdev(usbif); const char *macro_mode; - char data[8]; + char *data; + + data = kmalloc(2, GFP_KERNEL); + if (!data) + return -ENOMEM; ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), K90_REQUEST_GET_MODE, @@ -263,7 +277,8 @@ static ssize_t k90_show_macro_mode(struct device *dev, if (ret < 0) { dev_warn(dev, "Failed to get K90 initial mode (error %d).\n", ret); - return -EIO; + ret = -EIO; + goto out; } switch (data[0]) { @@ -277,10 +292,15 @@ static ssize_t k90_show_macro_mode(struct device *dev, default: dev_warn(dev, "K90 in unknown mode: %02hhx.\n", data[0]); - return -EIO; + ret = -EIO; + goto out; } - return snprintf(buf, PAGE_SIZE, "%s\n", macro_mode); + ret = snprintf(buf, PAGE_SIZE, "%s\n", macro_mode); +out: + kfree(data); + + return ret; } static ssize_t k90_store_macro_mode(struct device *dev, @@ -320,7 +340,11 @@ static ssize_t k90_show_current_profile(struct device *dev, struct usb_interface *usbif = to_usb_interface(dev->parent); struct usb_device *usbdev = interface_to_usbdev(usbif); int current_profile; - char data[8]; + char *data; + + data = kmalloc(8, GFP_KERNEL); + if (!data) + return -ENOMEM; ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), K90_REQUEST_STATUS, @@ -330,16 +354,22 @@ static ssize_t k90_show_current_profile(struct device *dev, if (ret < 0) { dev_warn(dev, "Failed to get K90 initial state (error %d).\n", ret); - return -EIO; + ret = -EIO; + goto out; } current_profile = data[7]; if (current_profile < 1 || current_profile > 3) { dev_warn(dev, "Read invalid current profile: %02hhx.\n", data[7]); - return -EIO; + ret = -EIO; + goto out; } - return snprintf(buf, PAGE_SIZE, "%d\n", current_profile); + ret = snprintf(buf, PAGE_SIZE, "%d\n", current_profile); +out: + kfree(data); + + return ret; } static ssize_t k90_store_current_profile(struct device *dev, -- GitLab From 35028599bc8884f33e36a9353be6f1401940aa29 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Jan 2017 18:17:43 +0100 Subject: [PATCH 0755/1442] HID: corsair: fix control-transfer error handling commit 7a546af50eb78ab99840903083231eb635c8a566 upstream. Make sure to check for short control transfers in order to avoid parsing uninitialised buffer data and leaking it to user space. Note that the backlight and macro-mode buffer constraints are kept as loose as possible in order to avoid any regressions should the current buffer sizes be larger than necessary. Fixes: 6f78193ee9ea ("HID: corsair: Add Corsair Vengeance K90 driver") Signed-off-by: Johan Hovold Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-corsair.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index 5971907a23b1..c0303f61c26a 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c @@ -159,7 +159,7 @@ static enum led_brightness k90_backlight_get(struct led_classdev *led_cdev) USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, data, 8, USB_CTRL_SET_TIMEOUT); - if (ret < 0) { + if (ret < 5) { dev_warn(dev, "Failed to get K90 initial state (error %d).\n", ret); ret = -EIO; @@ -274,7 +274,7 @@ static ssize_t k90_show_macro_mode(struct device *dev, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, data, 2, USB_CTRL_SET_TIMEOUT); - if (ret < 0) { + if (ret < 1) { dev_warn(dev, "Failed to get K90 initial mode (error %d).\n", ret); ret = -EIO; @@ -351,7 +351,7 @@ static ssize_t k90_show_current_profile(struct device *dev, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, data, 8, USB_CTRL_SET_TIMEOUT); - if (ret < 0) { + if (ret < 8) { dev_warn(dev, "Failed to get K90 initial state (error %d).\n", ret); ret = -EIO; -- GitLab From c1274eeb2fe3524ed075d3343213923262c845c8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 21 Dec 2016 00:19:19 +0100 Subject: [PATCH 0756/1442] mmc: sdhci-acpi: Only powered up enabled acpi child devices commit e1d070c3793a2766122865a7c2142853b48808c5 upstream. Commit e5bbf30733f9 ("mmc: sdhci-acpi: Ensure connected devices are powered when probing") introduced code to powerup any acpi child nodes listed in the dstd. But some dstd-s list all possible devices used on some board variants, while reporting if the device is actually present and enabled in the status field of the device. So we end up calling the acpi _PS0 (power-on) method for devices which are not actually present. This does not always end well, e.g. on my cube iwork8 air tablet, this results in freezing the entire tablet as soon as the r8723bs module is loaded. This commit fixes this by checking the child device's status.present and status.enabled bits and only call acpi_device_fix_up_power() if both are set. Fixes: e5bbf30733f9 ("mmc: sdhci-acpi: Ensure connected devices are powered when probing") BugLink: https://github.com/hadess/rtl8723bs/issues/80 Signed-off-by: Hans de Goede Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-acpi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 81d4dc034793..fddd0be196f4 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -394,7 +394,8 @@ static int sdhci_acpi_probe(struct platform_device *pdev) /* Power on the SDHCI controller and its children */ acpi_device_fix_up_power(device); list_for_each_entry(child, &device->children, node) - acpi_device_fix_up_power(child); + if (child->status.present && child->status.enabled) + acpi_device_fix_up_power(child); if (acpi_bus_get_status(device) || !device->status.present) return -ENODEV; -- GitLab From ad17175732ca7b368b654c5736ef488a064c52a4 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 5 Jan 2017 19:24:04 +0000 Subject: [PATCH 0757/1442] mmc: mxs-mmc: Fix additional cycles after transmission stop commit 01167c7b9cbf099c69fe411a228e4e9c7104e123 upstream. According to the code the intention is to append 8 SCK cycles instead of 4 at end of a MMC_STOP_TRANSMISSION command. But this will never happened because it's an AC command not an ADTC command. So fix this by moving the statement into the right function. Signed-off-by: Stefan Wahren Fixes: e4243f13d10e (mmc: mxs-mmc: add mmc host driver for i.MX23/28) Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mxs-mmc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index 44ecebd1ea8c..c8b8ac66ff7e 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -309,6 +309,9 @@ static void mxs_mmc_ac(struct mxs_mmc_host *host) cmd0 = BF_SSP(cmd->opcode, CMD0_CMD); cmd1 = cmd->arg; + if (cmd->opcode == MMC_STOP_TRANSMISSION) + cmd0 |= BM_SSP_CMD0_APPEND_8CYC; + if (host->sdio_irq_en) { ctrl0 |= BM_SSP_CTRL0_SDIO_IRQ_CHECK; cmd0 |= BM_SSP_CMD0_CONT_CLKING_EN | BM_SSP_CMD0_SLOW_CLKING_EN; @@ -417,8 +420,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host) ssp->base + HW_SSP_BLOCK_SIZE); } - if ((cmd->opcode == MMC_STOP_TRANSMISSION) || - (cmd->opcode == SD_IO_RW_EXTENDED)) + if (cmd->opcode == SD_IO_RW_EXTENDED) cmd0 |= BM_SSP_CMD0_APPEND_8CYC; cmd1 = cmd->arg; -- GitLab From 86249aef233e6e05ba84f6c9aab9e6b890fc6ba7 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Thu, 15 Dec 2016 18:40:14 +0100 Subject: [PATCH 0758/1442] ieee802154: atusb: do not use the stack for buffers to make them DMA able commit 05a974efa4bdf6e2a150e3f27dc6fcf0a9ad5655 upstream. From 4.9 we should really avoid using the stack here as this will not be DMA able on various platforms. This changes the buffers already being present in time of 4.9 being released. This should go into stable as well. Reported-by: Dan Carpenter Signed-off-by: Stefan Schmidt Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/net/ieee802154/atusb.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 1056ed142411..f186e0460cde 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -112,13 +112,26 @@ static int atusb_read_reg(struct atusb *atusb, uint8_t reg) { struct usb_device *usb_dev = atusb->usb_dev; int ret; + uint8_t *buffer; uint8_t value; + buffer = kmalloc(1, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + dev_dbg(&usb_dev->dev, "atusb: reg = 0x%x\n", reg); ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_REG_READ, ATUSB_REQ_FROM_DEV, - 0, reg, &value, 1, 1000); - return ret >= 0 ? value : ret; + 0, reg, buffer, 1, 1000); + + if (ret >= 0) { + value = buffer[0]; + kfree(buffer); + return value; + } else { + kfree(buffer); + return ret; + } } static int atusb_write_subreg(struct atusb *atusb, uint8_t reg, uint8_t mask, @@ -587,9 +600,13 @@ static struct ieee802154_ops atusb_ops = { static int atusb_get_and_show_revision(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; - unsigned char buffer[3]; + unsigned char *buffer; int ret; + buffer = kmalloc(3, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + /* Get a couple of the ATMega Firmware values */ ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_ID, ATUSB_REQ_FROM_DEV, 0, 0, @@ -605,15 +622,20 @@ static int atusb_get_and_show_revision(struct atusb *atusb) dev_info(&usb_dev->dev, "Please update to version 0.2 or newer"); } + kfree(buffer); return ret; } static int atusb_get_and_show_build(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; - char build[ATUSB_BUILD_SIZE + 1]; + char *build; int ret; + build = kmalloc(ATUSB_BUILD_SIZE + 1, GFP_KERNEL); + if (!build) + return -ENOMEM; + ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), ATUSB_BUILD, ATUSB_REQ_FROM_DEV, 0, 0, build, ATUSB_BUILD_SIZE, 1000); @@ -622,6 +644,7 @@ static int atusb_get_and_show_build(struct atusb *atusb) dev_info(&usb_dev->dev, "Firmware: build %s\n", build); } + kfree(build); return ret; } -- GitLab From d8d82ac42e27a0312193364193139aa1917efe11 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Mon, 5 Dec 2016 03:47:10 +0200 Subject: [PATCH 0759/1442] mtd: nand: lpc32xx: fix invalid error handling of a requested irq commit cf9e1672a66c49ed8903c01b4c380a2f2dc91b40 upstream. Semantics of NR_IRQS is different on machines with SPARSE_IRQ option disabled or enabled, in the latter case IRQs are allocated starting at least from the value specified by NR_IRQS and going upwards, so the check of (irq >= NR_IRQ) to decide about an error code returned by platform_get_irq() is completely invalid, don't attempt to overrule irq subsystem in the driver. The change fixes LPC32xx NAND MLC driver initialization on boot. Fixes: 8cb17b5ed017 ("irqchip: Add LPC32xx interrupt controller driver") Signed-off-by: Vladimir Zapolskiy Acked-by: Sylvain Lemieux Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/lpc32xx_mlc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c index 852388171f20..bc6e49af063a 100644 --- a/drivers/mtd/nand/lpc32xx_mlc.c +++ b/drivers/mtd/nand/lpc32xx_mlc.c @@ -776,7 +776,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev) init_completion(&host->comp_controller); host->irq = platform_get_irq(pdev, 0); - if ((host->irq < 0) || (host->irq >= NR_IRQS)) { + if (host->irq < 0) { dev_err(&pdev->dev, "failed to get platform irq\n"); res = -EINVAL; goto err_exit3; -- GitLab From c8dce6e0ef2c3641be81639fbfe4f10b6d411c41 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 5 Dec 2016 22:14:36 +0100 Subject: [PATCH 0760/1442] mtd: nand: xway: disable module support commit 73529c872a189c747bdb528ce9b85b67b0e28dec upstream. The xway_nand driver accesses the ltq_ebu_membase symbol which is not exported. This also should not get exported and we should handle the EBU interface in a better way later. This quick fix just deactivated support for building as module. Fixes: 99f2b107924c ("mtd: lantiq: Add NAND support on Lantiq XWAY SoC.") Signed-off-by: Hauke Mehrtens Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 7b7a887b4709..b254090b8a1b 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -537,7 +537,7 @@ config MTD_NAND_FSMC Flexible Static Memory Controller (FSMC) config MTD_NAND_XWAY - tristate "Support for NAND on Lantiq XWAY SoC" + bool "Support for NAND on Lantiq XWAY SoC" depends on LANTIQ && SOC_TYPE_XWAY help Enables support for NAND Flash chips on Lantiq XWAY SoCs. NAND is attached -- GitLab From 001ed27d088a4f66de05d440a2e7c8dd018be457 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 5 Dec 2016 22:14:37 +0100 Subject: [PATCH 0761/1442] mtd: nand: xway: fix build because of module functions commit a2724663494f7313f53da10d8c0a729c5e3c4dea upstream. Remove the usage of modules functions to make this driver compile again. Otherwise an include of linux/modules.h would be needed. Fixes: 024366750c2e ("mtd: nand: xway: convert to normal platform driver") Signed-off-by: Hauke Mehrtens Signed-off-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/xway_nand.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/mtd/nand/xway_nand.c b/drivers/mtd/nand/xway_nand.c index 1f2948c0c458..895101a5e686 100644 --- a/drivers/mtd/nand/xway_nand.c +++ b/drivers/mtd/nand/xway_nand.c @@ -232,7 +232,6 @@ static const struct of_device_id xway_nand_match[] = { { .compatible = "lantiq,nand-xway" }, {}, }; -MODULE_DEVICE_TABLE(of, xway_nand_match); static struct platform_driver xway_nand_driver = { .probe = xway_nand_probe, @@ -243,6 +242,4 @@ static struct platform_driver xway_nand_driver = { }, }; -module_platform_driver(xway_nand_driver); - -MODULE_LICENSE("GPL"); +builtin_platform_driver(xway_nand_driver); -- GitLab From e0641f201114700dceac729babc89991ebb4b3ef Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 12 Jan 2017 16:25:15 +0100 Subject: [PATCH 0762/1442] KVM: s390: do not expose random data via facility bitmap commit 04478197416e3a302e9ebc917ba1aa884ef9bfab upstream. kvm_s390_get_machine() populates the facility bitmap by copying bytes from the host results that are stored in a 256 byte array in the prefix page. The KVM code does use the size of the target buffer (2k), thus copying and exposing unrelated kernel memory (mostly machine check related logout data). Let's use the size of the source buffer instead. This is ok, as the target buffer will always be greater or equal than the source buffer as the KVM internal buffers (and thus S390_ARCH_FAC_LIST_SIZE_BYTE) cover the maximum possible size that is allowed by STFLE, which is 256 doublewords. All structures are zero allocated so we can leave bytes 256-2047 unchanged. Add a similar fix for kvm_arch_init_vm(). Reported-by: Heiko Carstens [found with smatch] Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/kvm-s390.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9c7a1ecfe6bd..47a1de77b18d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -916,7 +916,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, - S390_ARCH_FAC_LIST_SIZE_BYTE); + sizeof(S390_lowcore.stfle_fac_list)); if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) ret = -EFAULT; kfree(mach); @@ -1437,7 +1437,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* Populate the facility mask initially. */ memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, - S390_ARCH_FAC_LIST_SIZE_BYTE); + sizeof(S390_lowcore.stfle_fac_list)); for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { if (i < kvm_s390_fac_list_mask_size()) kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; -- GitLab From 26c4d513b6af730941bb4ff4c237789a4d190c27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Jan 2017 09:21:56 +0000 Subject: [PATCH 0763/1442] KVM: arm/arm64: vgic: Fix deadlock on error handling commit 1193e6aeecb36c74c48c7cd0f641acbbed9ddeef upstream. Dmitry Vyukov reported that the syzkaller fuzzer triggered a deadlock in the vgic setup code when an error was detected, as the cleanup code tries to take a lock that is already held by the setup code. The fix is to avoid retaking the lock when cleaning up, by telling the cleanup function that we already hold it. Reported-by: Dmitry Vyukov Reviewed-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-init.c | 18 +++++++++++++----- virt/kvm/arm/vgic/vgic-v2.c | 2 -- virt/kvm/arm/vgic/vgic-v3.c | 2 -- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 8cebfbc19e90..539d3f5cb619 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -268,15 +268,11 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - mutex_lock(&kvm->lock); - dist->ready = false; dist->initialized = false; kfree(dist->spis); dist->nr_spis = 0; - - mutex_unlock(&kvm->lock); } void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -286,7 +282,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) INIT_LIST_HEAD(&vgic_cpu->ap_list_head); } -void kvm_vgic_destroy(struct kvm *kvm) +/* To be called with kvm->lock held */ +static void __kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; int i; @@ -297,6 +294,13 @@ void kvm_vgic_destroy(struct kvm *kvm) kvm_vgic_vcpu_destroy(vcpu); } +void kvm_vgic_destroy(struct kvm *kvm) +{ + mutex_lock(&kvm->lock); + __kvm_vgic_destroy(kvm); + mutex_unlock(&kvm->lock); +} + /** * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest * is a GICv2. A GICv3 must be explicitly initialized by the guest using the @@ -348,6 +352,10 @@ int kvm_vgic_map_resources(struct kvm *kvm) ret = vgic_v2_map_resources(kvm); else ret = vgic_v3_map_resources(kvm); + + if (ret) + __kvm_vgic_destroy(kvm); + out: mutex_unlock(&kvm->lock); return ret; diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 9bab86757fa4..834137e7b83f 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -293,8 +293,6 @@ int vgic_v2_map_resources(struct kvm *kvm) dist->ready = true; out: - if (ret) - kvm_vgic_destroy(kvm); return ret; } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 5c9f9745e6ca..e6b03fd8c374 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -302,8 +302,6 @@ int vgic_v3_map_resources(struct kvm *kvm) dist->ready = true; out: - if (ret) - kvm_vgic_destroy(kvm); return ret; } -- GitLab From 09187cdf50c17a5f537b78a3154c4b1014d9a36e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 16 Jan 2017 11:58:53 -0600 Subject: [PATCH 0764/1442] powerpc/icp-opal: Fix missing KVM case and harden replay commit 9728a7c8ab2f7a1c8d5c95278d2e4f4ac1285385 upstream. The icp-opal call is missing the code from icp-native to recover interrupts snatched by KVM. Without that, when running KVM, we can get into a situation where an interrupt is lost and the CPU stuck with an elevated CPPR. Also harden replay by always checking the return from opal_int_eoi(). Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/sysdev/xics/icp-opal.c | 31 ++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index d38e86fd5720..60c57657c772 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -20,6 +20,7 @@ #include #include #include +#include static void icp_opal_teardown_cpu(void) { @@ -39,7 +40,26 @@ static void icp_opal_flush_ipi(void) * Should we be flagging idle loop instead? * Or creating some task to be scheduled? */ - opal_int_eoi((0x00 << 24) | XICS_IPI); + if (opal_int_eoi((0x00 << 24) | XICS_IPI) > 0) + force_external_irq_replay(); +} + +static unsigned int icp_opal_get_xirr(void) +{ + unsigned int kvm_xirr; + __be32 hw_xirr; + int64_t rc; + + /* Handle an interrupt latched by KVM first */ + kvm_xirr = kvmppc_get_xics_latch(); + if (kvm_xirr) + return kvm_xirr; + + /* Then ask OPAL */ + rc = opal_int_get_xirr(&hw_xirr, false); + if (rc < 0) + return 0; + return be32_to_cpu(hw_xirr); } static unsigned int icp_opal_get_irq(void) @@ -47,12 +67,8 @@ static unsigned int icp_opal_get_irq(void) unsigned int xirr; unsigned int vec; unsigned int irq; - int64_t rc; - rc = opal_int_get_xirr(&xirr, false); - if (rc < 0) - return 0; - xirr = be32_to_cpu(xirr); + xirr = icp_opal_get_xirr(); vec = xirr & 0x00ffffff; if (vec == XICS_IRQ_SPURIOUS) return 0; @@ -67,7 +83,8 @@ static unsigned int icp_opal_get_irq(void) xics_mask_unknown_vec(vec); /* We might learn about it later, so EOI it */ - opal_int_eoi(xirr); + if (opal_int_eoi(xirr) > 0) + force_external_irq_replay(); return 0; } -- GitLab From 98e60a8345ae93fc5e5a9c9f30879a1da53aabad Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 9 Jan 2017 19:00:15 +0530 Subject: [PATCH 0765/1442] powerpc/perf: Fix PM_BRU_CMPL event code for power9 commit d89f473ff6f84872e761419f7233d6e00f99c340 upstream. Use 0x10012 event code for PM_BRU_CMPL event in power9 event list instead of current 0x40060. Fixes: 34922527a2bcb ('powerpc/perf: Add power9 event list macros for generic and cache events') Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/power9-events-list.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h index 6447dc1c3d89..929b56d47ad9 100644 --- a/arch/powerpc/perf/power9-events-list.h +++ b/arch/powerpc/perf/power9-events-list.h @@ -16,7 +16,7 @@ EVENT(PM_CYC, 0x0001e) EVENT(PM_ICT_NOSLOT_CYC, 0x100f8) EVENT(PM_CMPLU_STALL, 0x1e054) EVENT(PM_INST_CMPL, 0x00002) -EVENT(PM_BRU_CMPL, 0x40060) +EVENT(PM_BRU_CMPL, 0x10012) EVENT(PM_BR_MPRED_CMPL, 0x400f6) /* All L1 D cache load references counted at finish, gated by reject */ -- GitLab From 9e10a0fc73122e90544875ec8375f1e9a187bb25 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 5 Jan 2017 16:50:57 +0000 Subject: [PATCH 0766/1442] powerpc/ptrace: Preserve previous fprs/vsrs on short regset write commit 99dfe80a2a246c600440a815741fd2e74a8b4977 upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Fixes: c6e6771b87d4 ("powerpc: Introduce VSX thread_struct and CONFIG_VSX") Signed-off-by: Dave Martin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/ptrace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index b1ec62f2cc31..18a643e27641 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -463,6 +463,10 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) @@ -672,6 +676,9 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset, flush_altivec_to_thread(target); flush_vsx_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); if (!ret) -- GitLab From e097785467134635a99a223168ff3825c6fb9d01 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 5 Jan 2017 16:50:57 +0000 Subject: [PATCH 0767/1442] powerpc/ptrace: Preserve previous TM fprs/vsrs on short regset write commit b34ca60148c53971d373643195cc5c4d5d20be78 upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the check pointed registers, the thread's old check pointed registers are preserved. Fixes: 9d3918f7c0e5 ("powerpc/ptrace: Enable support for NT_PPC_CVSX") Fixes: 19cbcbf75a0c ("powerpc/ptrace: Enable support for NT_PPC_CFPR") Signed-off-by: Dave Martin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/ptrace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 18a643e27641..5c8f12fe9721 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1026,6 +1026,10 @@ static int tm_cfpr_set(struct task_struct *target, flush_fp_to_thread(target); flush_altivec_to_thread(target); + for (i = 0; i < 32; i++) + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; + /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) @@ -1290,6 +1294,9 @@ static int tm_cvsx_set(struct task_struct *target, flush_altivec_to_thread(target); flush_vsx_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); if (!ret) -- GitLab From bf15e45eee7ab5df30cc9ec0f7030fbccf1d55b0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 19 Jan 2017 14:19:10 +1100 Subject: [PATCH 0768/1442] powerpc: Ignore reserved field in DCSR and PVR reads and writes commit 178f358208ceb8b38e5cff3f815e0db4a6a70a07 upstream. IBM bit 31 (for the rest of us - bit 0) is a reserved field in the instruction definition of mtspr and mfspr. Hardware is encouraged to (and does) ignore it. As a result, if userspace executes an mtspr DSCR with the reserved bit set, we get a DSCR facility unavailable exception. The kernel fails to match against the expected value/mask, and we silently return to userspace to try and re-execute the same mtspr DSCR instruction. We loop forever until the process is killed. We should do something here, and it seems mirroring what hardware does is the better option vs killing the process. While here, relax the matching of mfspr PVR too. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/ppc-opcode.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index c56ea8c84abb..c4ced1d01d57 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -157,7 +157,7 @@ #define PPC_INST_MCRXR 0x7c000400 #define PPC_INST_MCRXR_MASK 0xfc0007fe #define PPC_INST_MFSPR_PVR 0x7c1f42a6 -#define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff +#define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe #define PPC_INST_MFTMR 0x7c0002dc #define PPC_INST_MSGSND 0x7c00019c #define PPC_INST_MSGCLR 0x7c0001dc @@ -174,13 +174,13 @@ #define PPC_INST_RFDI 0x4c00004e #define PPC_INST_RFMCI 0x4c00004c #define PPC_INST_MFSPR_DSCR 0x7c1102a6 -#define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff +#define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR 0x7c1103a6 -#define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff +#define PPC_INST_MTSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MFSPR_DSCR_USER 0x7c0302a6 -#define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1fffff +#define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 -#define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1fffff +#define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MFVSRD 0x7c000066 #define PPC_INST_MTVSRD 0x7c000166 #define PPC_INST_SLBFEE 0x7c0007a7 -- GitLab From 33038189bcabe9fecfef79adf985539b954e0c7c Mon Sep 17 00:00:00 2001 From: Ruslan Ruslichenko Date: Tue, 17 Jan 2017 16:13:52 +0200 Subject: [PATCH 0769/1442] x86/ioapic: Restore IO-APIC irq_chip retrigger callback commit 020eb3daaba2857b32c4cf4c82f503d6a00a67de upstream. commit d32932d02e18 removed the irq_retrigger callback from the IO-APIC chip and did not add it to the new IO-APIC-IR irq chip. Unfortunately the software resend fallback is not enabled on X86, so edge interrupts which are received during the lazy disabled state of the interrupt line are not retriggered and therefor lost. Restore the callbacks. [ tglx: Massaged changelog ] Fixes: d32932d02e18 ("x86/irq: Convert IOAPIC to use hierarchical irqdomain interfaces") Signed-off-by: Ruslan Ruslichenko Cc: xe-linux-external@cisco.com Link: http://lkml.kernel.org/r/1484662432-13580-1-git-send-email-rruslich@cisco.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/io_apic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 48e6d84f173e..3d8ff40ecc6f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1876,6 +1876,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ack_level, .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -1887,6 +1888,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ir_ack_level, .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; -- GitLab From beecb1e72cc5af0921c9daf75c404408744822cd Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 23 Dec 2016 18:06:10 -0800 Subject: [PATCH 0770/1442] qla2xxx: Fix crash due to null pointer access commit fc1ffd6cb38a1c1af625b9833c41928039e733f5 upstream. During code inspection, while investigating following stack trace seen on one of the test setup, we found out there was possibility of memory leak becuase driver was not unwinding the stack properly. This issue has not been reproduced in a test environment or on a customer setup. Here's stack trace that was seen. [1469877.797315] Call Trace: [1469877.799940] [] qla2x00_mem_alloc+0xb09/0x10c0 [qla2xxx] [1469877.806980] [] qla2x00_probe_one+0x86a/0x1b50 [qla2xxx] [1469877.814013] [] ? __pm_runtime_resume+0x51/0xa0 [1469877.820265] [] ? _raw_spin_lock_irqsave+0x25/0x90 [1469877.826776] [] ? _raw_spin_unlock_irqrestore+0x6d/0x80 [1469877.833720] [] ? preempt_count_sub+0xb1/0x100 [1469877.839885] [] ? _raw_spin_unlock_irqrestore+0x4c/0x80 [1469877.846830] [] local_pci_probe+0x4c/0xb0 [1469877.852562] [] ? preempt_count_sub+0xb1/0x100 [1469877.858727] [] pci_call_probe+0x89/0xb0 Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Reviewed-by: Christoph Hellwig [ bvanassche: Fixed spelling in patch description ] Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_os.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 56d6142852a5..078d797cb492 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3489,7 +3489,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, sizeof(struct ct6_dsd), 0, SLAB_HWCACHE_ALIGN, NULL); if (!ctx_cachep) - goto fail_free_gid_list; + goto fail_free_srb_mempool; } ha->ctx_mempool = mempool_create_slab_pool(SRB_MIN_REQ, ctx_cachep); @@ -3642,7 +3642,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, ha->loop_id_map = kzalloc(BITS_TO_LONGS(LOOPID_MAP_SIZE) * sizeof(long), GFP_KERNEL); if (!ha->loop_id_map) - goto fail_async_pd; + goto fail_loop_id_map; else { qla2x00_set_reserved_loop_ids(ha); ql_dbg_pci(ql_dbg_init, ha->pdev, 0x0123, @@ -3651,6 +3651,8 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, return 0; +fail_loop_id_map: + dma_pool_free(ha->s_dma_pool, ha->async_pd, ha->async_pd_dma); fail_async_pd: dma_pool_free(ha->s_dma_pool, ha->ex_init_cb, ha->ex_init_cb_dma); fail_ex_init_cb: @@ -3678,6 +3680,10 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, dma_pool_free(ha->s_dma_pool, ha->ms_iocb, ha->ms_iocb_dma); ha->ms_iocb = NULL; ha->ms_iocb_dma = 0; + + if (ha->sns_cmd) + dma_free_coherent(&ha->pdev->dev, sizeof(struct sns_cmd_pkt), + ha->sns_cmd, ha->sns_cmd_dma); fail_dma_pool: if (IS_QLA82XX(ha) || ql2xenabledif) { dma_pool_destroy(ha->fcp_cmnd_dma_pool); @@ -3695,10 +3701,12 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, kfree(ha->nvram); ha->nvram = NULL; fail_free_ctx_mempool: - mempool_destroy(ha->ctx_mempool); + if (ha->ctx_mempool) + mempool_destroy(ha->ctx_mempool); ha->ctx_mempool = NULL; fail_free_srb_mempool: - mempool_destroy(ha->srb_mempool); + if (ha->srb_mempool) + mempool_destroy(ha->srb_mempool); ha->srb_mempool = NULL; fail_free_gid_list: dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha), -- GitLab From bab10a549fd64e36f06c995d6adf22c0413e9379 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Dec 2016 16:47:43 +0100 Subject: [PATCH 0771/1442] mac80211: implement multicast forwarding on fast-RX path commit eeb0d56fab4cd7848cf2be6704fa48900dbc1381 upstream. In AP (or VLAN) mode, when unicast 802.11 packets are received, they might actually be multicast after conversion. In this case the fast-RX path didn't handle them properly to send them back to the wireless medium. Implement that by copying the SKB and sending it back out. The possible alternative would be to just punt the packet back to the regular (slow) RX path, but since we have almost all of the required code here already it's not so complicated to add here. Punting it back would also mean acquiring the spinlock, which would be bad for the stated purpose of the fast-RX path, to enable well-performing parallel RX. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a47bbc973f2d..2384b4aae064 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3939,21 +3939,31 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, u64_stats_update_end(&stats->syncp); if (fast_rx->internal_forward) { - struct sta_info *dsta = sta_info_get(rx->sdata, skb->data); + struct sk_buff *xmit_skb = NULL; + bool multicast = is_multicast_ether_addr(skb->data); - if (dsta) { + if (multicast) { + xmit_skb = skb_copy(skb, GFP_ATOMIC); + } else if (sta_info_get(rx->sdata, skb->data)) { + xmit_skb = skb; + skb = NULL; + } + + if (xmit_skb) { /* * Send to wireless media and increase priority by 256 * to keep the received priority instead of * reclassifying the frame (see cfg80211_classify8021d). */ - skb->priority += 256; - skb->protocol = htons(ETH_P_802_3); - skb_reset_network_header(skb); - skb_reset_mac_header(skb); - dev_queue_xmit(skb); - return true; + xmit_skb->priority += 256; + xmit_skb->protocol = htons(ETH_P_802_3); + skb_reset_network_header(xmit_skb); + skb_reset_mac_header(xmit_skb); + dev_queue_xmit(xmit_skb); } + + if (!skb) + return true; } /* deliver to local stack */ -- GitLab From ce5c52f039cf886ac1f2a2535bb4e02ebcefe043 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 10 Jan 2017 11:49:40 +0100 Subject: [PATCH 0772/1442] ubifs: Fix journal replay wrt. xattr nodes commit 1cb51a15b576ee325d527726afff40947218fd5e upstream. When replaying the journal it can happen that a journal entry points to a garbage collected node. This is the case when a power-cut occurred between a garbage collect run and a commit. In such a case nodes have to be read using the failable read functions to detect whether the found node matches what we expect. One corner case was forgotten, when the journal contains an entry to remove an inode all xattrs have to be removed too. UBIFS models xattr like directory entries, so the TNC code iterates over all xattrs of the inode and removes them too. This code re-uses the functions for walking directories and calls ubifs_tnc_next_ent(). ubifs_tnc_next_ent() expects to be used only after the journal and aborts when a node does not match the expected result. This behavior can render an UBIFS volume unmountable after a power-cut when xattrs are used. Fix this issue by using failable read functions in ubifs_tnc_next_ent() too when replaying the journal. Fixes: 1e51764a3c2ac05a ("UBIFS: add new flash file system") Reported-by: Rock Lee Reviewed-by: David Gstir Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/tnc.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index fa9a20cc60d6..fe5e8d4970ae 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -34,6 +34,11 @@ #include #include "ubifs.h" +static int try_read_node(const struct ubifs_info *c, void *buf, int type, + int len, int lnum, int offs); +static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_zbranch *zbr, void *node); + /* * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions. * @NAME_LESS: name corresponding to the first argument is less than second @@ -402,7 +407,19 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, return 0; } - err = ubifs_tnc_read_node(c, zbr, node); + if (c->replaying) { + err = fallible_read_node(c, &zbr->key, zbr, node); + /* + * When the node was not found, return -ENOENT, 0 otherwise. + * Negative return codes stay as-is. + */ + if (err == 0) + err = -ENOENT; + else if (err == 1) + err = 0; + } else { + err = ubifs_tnc_read_node(c, zbr, node); + } if (err) return err; @@ -2766,7 +2783,11 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, if (nm->name) { if (err) { /* Handle collisions */ - err = resolve_collision(c, key, &znode, &n, nm); + if (c->replaying) + err = fallible_resolve_collision(c, key, &znode, &n, + nm, 0); + else + err = resolve_collision(c, key, &znode, &n, nm); dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); if (unlikely(err < 0)) -- GitLab From c49b31b585a17fd59227a23bd98b7c834947e851 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Tue, 17 Jan 2017 13:54:36 +0900 Subject: [PATCH 0773/1442] clocksource/exynos_mct: Clear interrupt when cpu is shut down commit bc7c36eedb0c7004aa06c2afc3c5385adada8fa3 upstream. When a CPU goes offline a potentially pending timer interrupt is not cleared. When the CPU comes online again then the pending interrupt is delivered before the per cpu clockevent device is initialized. As a consequence the tick interrupt handler dereferences a NULL pointer. [ 51.251378] Unable to handle kernel NULL pointer dereference at virtual address 00000040 [ 51.289348] task: ee942d00 task.stack: ee960000 [ 51.293861] PC is at tick_periodic+0x38/0xb0 [ 51.298102] LR is at tick_handle_periodic+0x1c/0x90 Clear the pending interrupt in the cpu dying path. Fixes: 56a94f13919c ("clocksource: exynos_mct: Avoid blocking calls in the cpu hotplug notifier") Reported-by: Seung-Woo Kim Signed-off-by: Joonyoung Shim Cc: linux-samsung-soc@vger.kernel.org Cc: cw00.choi@samsung.com Cc: daniel.lezcano@linaro.org Cc: javier@osg.samsung.com Cc: kgene@kernel.org Cc: krzk@kernel.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1484628876-22065-1-git-send-email-jy0922.shim@samsung.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/exynos_mct.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 8f3488b80896..7f6fed9f0703 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -495,6 +495,7 @@ static int exynos4_mct_dying_cpu(unsigned int cpu) if (mct_int_type == MCT_INT_SPI) { if (evt->irq != -1) disable_irq_nosync(evt->irq); + exynos4_mct_write(0x1, mevt->base + MCT_L_INT_CSTAT_OFFSET); } else { disable_percpu_irq(mct_irqs[MCT_L0_IRQ]); } -- GitLab From 73a2e2405d300931f9e30ae439dcda21f592d110 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Mon, 9 Jan 2017 16:00:44 +0530 Subject: [PATCH 0774/1442] svcrdma: avoid duplicate dma unmapping during error recovery commit ce1ca7d2d140a1f4aaffd297ac487f246963dd2f upstream. In rdma_read_chunk_frmr() when ib_post_send() fails, the error code path invokes ib_dma_unmap_sg() to unmap the sg list. It then invokes svc_rdma_put_frmr() which in turn tries to unmap the same sg list through ib_dma_unmap_sg() again. This second unmap is invalid and could lead to problems when the iova being unmapped is subsequently reused. Remove the call to unmap in rdma_read_chunk_frmr() and let svc_rdma_put_frmr() handle it. Fixes: 412a15c0fe53 ("svcrdma: Port to new memory registration API") Signed-off-by: Sriharsha Basavapatna Reviewed-by: Chuck Lever Reviewed-by: Yuval Shaia Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ad1df979b3f0..a47c9bdef5fa 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -348,8 +348,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, atomic_inc(&rdma_stat_read); return ret; err: - ib_dma_unmap_sg(xprt->sc_cm_id->device, - frmr->sg, frmr->sg_nents, frmr->direction); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; -- GitLab From df35a8f51fcbad255df9ac541eef2e84182a4c50 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 6 Jan 2017 13:12:47 +0100 Subject: [PATCH 0775/1442] ARM: 8634/1: hw_breakpoint: blacklist Scorpion CPUs commit ddc37832a1349f474c4532de381498020ed71d31 upstream. On APQ8060, the kernel crashes in arch_hw_breakpoint_init, taking an undefined instruction trap within write_wb_reg. This is because Scorpion CPUs erroneously appear to set DBGPRSR.SPD when WFI is issued, even if the core is not powered down. When DBGPRSR.SPD is set, breakpoint and watchpoint registers are treated as undefined. It's possible to trigger similar crashes later on from userspace, by requesting the kernel to install a breakpoint or watchpoint, as we can go idle at any point between the reset of the debug registers and their later use. This has always been the case. Given that this has always been broken, no-one has complained until now, and there is no clear workaround, disable hardware breakpoints and watchpoints on Scorpion to avoid these issues. Signed-off-by: Mark Rutland Reported-by: Linus Walleij Reviewed-by: Stephen Boyd Acked-by: Will Deacon Cc: Russell King Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/cputype.h | 3 +++ arch/arm/kernel/hw_breakpoint.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 522b5feb4eaa..b62eaeb147aa 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -94,6 +94,9 @@ #define ARM_CPU_XSCALE_ARCH_V2 0x4000 #define ARM_CPU_XSCALE_ARCH_V3 0x6000 +/* Qualcomm implemented cores */ +#define ARM_CPU_PART_SCORPION 0x510002d0 + extern unsigned int processor_id; #ifdef CONFIG_CPU_CP15 diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index b8df45883cf7..25538a935874 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -1066,6 +1066,22 @@ static int __init arch_hw_breakpoint_init(void) return 0; } + /* + * Scorpion CPUs (at least those in APQ8060) seem to set DBGPRSR.SPD + * whenever a WFI is issued, even if the core is not powered down, in + * violation of the architecture. When DBGPRSR.SPD is set, accesses to + * breakpoint and watchpoint registers are treated as undefined, so + * this results in boot time and runtime failures when these are + * accessed and we unexpectedly take a trap. + * + * It's not clear if/how this can be worked around, so we blacklist + * Scorpion CPUs to avoid these issues. + */ + if (read_cpuid_part() == ARM_CPU_PART_SCORPION) { + pr_info("Scorpion CPU detected. Hardware breakpoints and watchpoints disabled\n"); + return 0; + } + has_ossr = core_has_os_save_restore(); /* Determine how many BRPs/WRPs are available. */ -- GitLab From 51cff2c64d20ba65af768b4d922b98cb7baf947e Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Wed, 11 Jan 2017 13:16:42 -0600 Subject: [PATCH 0776/1442] ibmvscsis: Fix sleeping in interrupt context commit a5b0e4062fb225155189e593699bbfcd0597f8b5 upstream. Currently, dma_alloc_coherent is being called with a GFP_KERNEL flag which allows it to sleep in an interrupt context, need to change to GFP_ATOMIC. Tested-by: Steven Royer Reviewed-by: Michael Cyr Signed-off-by: Bryant G. Ly Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 608140f16d98..c5dd3e0b67eb 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -1239,7 +1239,7 @@ static long ibmvscsis_adapter_info(struct scsi_info *vscsi, } info = dma_alloc_coherent(&vscsi->dma_dev->dev, sizeof(*info), &token, - GFP_KERNEL); + GFP_ATOMIC); if (!info) { dev_err(&vscsi->dev, "bad dma_alloc_coherent %p\n", iue->target); @@ -1357,7 +1357,7 @@ static int ibmvscsis_cap_mad(struct scsi_info *vscsi, struct iu_entry *iue) } cap = dma_alloc_coherent(&vscsi->dma_dev->dev, olen, &token, - GFP_KERNEL); + GFP_ATOMIC); if (!cap) { dev_err(&vscsi->dev, "bad dma_alloc_coherent %p\n", iue->target); -- GitLab From a14aeccb65e5dacdd8b47eafc2778b1bfce8978b Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Wed, 11 Jan 2017 13:52:27 -0600 Subject: [PATCH 0777/1442] ibmvscsis: Fix max transfer length commit 387b978cb0d12cf3720ecb17e652e0a9991a08e2 upstream. Current code incorrectly calculates the max transfer length, since it is assuming a 4k page table, but ppc64 all run on 64k page tables. Reported-by: Steven Royer Tested-by: Steven Royer Signed-off-by: Bryant G. Ly Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index c5dd3e0b67eb..e3b911c895b4 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -45,6 +45,7 @@ #define INITIAL_SRP_LIMIT 800 #define DEFAULT_MAX_SECTORS 256 +#define MAX_TXU 1024 * 1024 static uint max_vdma_size = MAX_H_COPY_RDMA; @@ -1291,7 +1292,7 @@ static long ibmvscsis_adapter_info(struct scsi_info *vscsi, info->mad_version = cpu_to_be32(MAD_VERSION_1); info->os_type = cpu_to_be32(LINUX); memset(&info->port_max_txu[0], 0, sizeof(info->port_max_txu)); - info->port_max_txu[0] = cpu_to_be32(128 * PAGE_SIZE); + info->port_max_txu[0] = cpu_to_be32(MAX_TXU); dma_wmb(); rc = h_copy_rdma(sizeof(*info), vscsi->dds.window[LOCAL].liobn, -- GitLab From 1f75575aca7be589881cda14344b978b42cbceb5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 12 Jan 2017 14:42:41 -0500 Subject: [PATCH 0778/1442] ceph: fix bad endianness handling in parse_reply_info_extra commit 6df8c9d80a27cb587f61b4f06b57e248d8bc3f86 upstream. sparse says: fs/ceph/mds_client.c:291:23: warning: restricted __le32 degrades to integer fs/ceph/mds_client.c:293:28: warning: restricted __le32 degrades to integer fs/ceph/mds_client.c:294:28: warning: restricted __le32 degrades to integer fs/ceph/mds_client.c:296:28: warning: restricted __le32 degrades to integer The op value is __le32, so we need to convert it before comparing it. Signed-off-by: Jeff Layton Reviewed-by: Sage Weil Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/mds_client.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 815acd1a56d4..6a26c7bd1286 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -288,12 +288,13 @@ static int parse_reply_info_extra(void **p, void *end, struct ceph_mds_reply_info_parsed *info, u64 features) { - if (info->head->op == CEPH_MDS_OP_GETFILELOCK) + u32 op = le32_to_cpu(info->head->op); + + if (op == CEPH_MDS_OP_GETFILELOCK) return parse_reply_info_filelock(p, end, info, features); - else if (info->head->op == CEPH_MDS_OP_READDIR || - info->head->op == CEPH_MDS_OP_LSSNAP) + else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP) return parse_reply_info_dir(p, end, info, features); - else if (info->head->op == CEPH_MDS_OP_CREATE) + else if (op == CEPH_MDS_OP_CREATE) return parse_reply_info_create(p, end, info, features); else return -EIO; -- GitLab From 5b66186152158076de6a1405329b255a327dd538 Mon Sep 17 00:00:00 2001 From: Jean-Jacques Hiblot Date: Mon, 9 Jan 2017 13:22:15 +0100 Subject: [PATCH 0779/1442] ARM: dts: OMAP5 / DRA7: indicate that SATA port 0 is available. commit 87cb12910a2ab6ed41ae951ea4d9c1cc1120199a upstream. AHCI provides the register PORTS_IMPL to let the software know which port is supported. The register must be initialized by the bootloader. However in some cases u-boot doesn't properly initialize this value (if it is not compiled with SATA support for example or if the SATA initialization fails). The DTS entry "ports-implemented" can be used to override the value in PORTS_IMPL. Without this patch the SATA will not work in the following two cases: * if there has been a failure to initialize SATA in u-boot. * if ahci_platform module has been removed and re-inserted. The reason is that the content of PORTS_IMPL is lost after the module is removed. I suspect that it's because the controller is reset by the hwmod. Signed-off-by: Jean-Jacques Hiblot Acked-by: Roger Quadros [tony@atomide.com: updated comments with what goes wrong] Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra7.dtsi | 1 + arch/arm/boot/dts/omap5.dtsi | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index d4fcd68f6349..ef888295aa7b 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1376,6 +1376,7 @@ phy-names = "sata-phy"; clocks = <&sata_ref_clk>; ti,hwmods = "sata"; + ports-implemented = <0x1>; }; rtc: rtc@48838000 { diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 25262118ec3d..f8aa0cbc5f01 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -985,6 +985,7 @@ phy-names = "sata-phy"; clocks = <&sata_ref_clk>; ti,hwmods = "sata"; + ports-implemented = <0x1>; }; dss: dss@58000000 { -- GitLab From 71c49649551460f484c97f79c4fd51728ae55495 Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Tue, 17 Jan 2017 13:57:42 +0100 Subject: [PATCH 0780/1442] ARM: dts: da850-evm: fix read access to SPI flash commit 43849785e1079f6606a31cb7fda92d1200849728 upstream. Read access to the SPI flash are broken on da850-evm, i.e. the data read is not what is actually programmed on the flash. According to the datasheet for the M25P64 part present on the da850-evm, if the SPI frequency is higher than 20MHz then the READ command is not usable anymore and only the FAST_READ command can be used to read data. This commit specifies in the DTS that we should use FAST_READ command instead of the READ command. Tested-by: Kevin Hilman Signed-off-by: Fabien Parent [nsekhar@ti.com: subject line adjustment] Signed-off-by: Sekhar Nori Signed-off-by: Greg Kroah-Hartman Signed-off-by: Olof Johansson --- arch/arm/boot/dts/da850-evm.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/da850-evm.dts b/arch/arm/boot/dts/da850-evm.dts index 41de15fe15a2..78492a0bbbab 100644 --- a/arch/arm/boot/dts/da850-evm.dts +++ b/arch/arm/boot/dts/da850-evm.dts @@ -99,6 +99,7 @@ #size-cells = <1>; compatible = "m25p64"; spi-max-frequency = <30000000>; + m25p,fast-read; reg = <0>; partition@0 { label = "U-Boot-SPL"; -- GitLab From de327948c009792ea49b07139c21e3a953137344 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 18 Jan 2017 17:23:41 +0000 Subject: [PATCH 0781/1442] arm64: avoid returning from bad_mode commit 7d9e8f71b989230bc613d121ca38507d34ada849 upstream. Generally, taking an unexpected exception should be a fatal event, and bad_mode is intended to cater for this. However, it should be possible to contain unexpected synchronous exceptions from EL0 without bringing the kernel down, by sending a SIGILL to the task. We tried to apply this approach in commit 9955ac47f4ba1c95 ("arm64: don't kill the kernel on a bad esr from el0"), by sending a signal for any bad_mode call resulting from an EL0 exception. However, this also applies to other unexpected exceptions, such as SError and FIQ. The entry paths for these exceptions branch to bad_mode without configuring the link register, and have no kernel_exit. Thus, if we take one of these exceptions from EL0, bad_mode will eventually return to the original user link register value. This patch fixes this by introducing a new bad_el0_sync handler to cater for the recoverable case, and restoring bad_mode to its original state, whereby it calls panic() and never returns. The recoverable case branches to bad_el0_sync with a bl, and returns to userspace via the usual ret_to_user mechanism. Signed-off-by: Mark Rutland Fixes: 9955ac47f4ba1c95 ("arm64: don't kill the kernel on a bad esr from el0") Reported-by: Mark Salter Cc: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/traps.c | 28 ++++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 223d54a4d66b..79b0fe24d5b7 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -624,7 +624,7 @@ el0_inv: mov x0, sp mov x1, #BAD_SYNC mov x2, x25 - bl bad_mode + bl bad_el0_sync b ret_to_user ENDPROC(el0_sync) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c9986b3e0a96..11e5eae088ab 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -596,17 +596,34 @@ const char *esr_get_class_string(u32 esr) } /* - * bad_mode handles the impossible case in the exception vector. + * bad_mode handles the impossible case in the exception vector. This is always + * fatal. */ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) { - siginfo_t info; - void __user *pc = (void __user *)instruction_pointer(regs); console_verbose(); pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n", handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); + + die("Oops - bad mode", regs, 0); + local_irq_disable(); + panic("bad mode"); +} + +/* + * bad_el0_sync handles unexpected, but potentially recoverable synchronous + * exceptions taken from EL0. Unlike bad_mode, this returns. + */ +asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) +{ + siginfo_t info; + void __user *pc = (void __user *)instruction_pointer(regs); + console_verbose(); + + pr_crit("Bad EL0 synchronous exception detected on CPU%d, code 0x%08x -- %s\n", + smp_processor_id(), esr, esr_get_class_string(esr)); __show_regs(regs); info.si_signo = SIGILL; @@ -614,7 +631,10 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) info.si_code = ILL_ILLOPC; info.si_addr = pc; - arm64_notify_die("Oops - bad mode", regs, &info, 0); + current->thread.fault_address = 0; + current->thread.fault_code = 0; + + force_sig_info(info.si_signo, &info, current); } void __pte_error(const char *file, int line, unsigned long val) -- GitLab From 357cfd6c83ee601f88f741b026c3301d6ca564e0 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 18 Jan 2017 16:25:20 +0000 Subject: [PATCH 0782/1442] arm64/ptrace: Preserve previous registers for short regset write commit 9a17b876b573441bfb3387ad55d98bf7184daf9d upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Fixes: 478fcb2cdb23 ("arm64: Debugging support") Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e0c81da60f76..fda01cc6ffcc 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -595,7 +595,7 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - struct user_pt_regs newregs; + struct user_pt_regs newregs = task_pt_regs(target)->user_regs; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1); if (ret) @@ -625,7 +625,8 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - struct user_fpsimd_state newstate; + struct user_fpsimd_state newstate = + target->thread.fpsimd_state.user_fpsimd; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); if (ret) @@ -649,7 +650,7 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - unsigned long tls; + unsigned long tls = target->thread.tp_value; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) -- GitLab From a4aafb8c4204c3a672b3340ae66e731148a30cd1 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 18 Jan 2017 16:25:21 +0000 Subject: [PATCH 0783/1442] arm64/ptrace: Preserve previous registers for short regset write - 2 commit 9dd73f72f218320c6c90da5f834996e7360dc227 upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Fixes: 766a85d7bc5d ("arm64: ptrace: add NT_ARM_SYSTEM_CALL regset") Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index fda01cc6ffcc..59f562bba319 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -676,7 +676,8 @@ static int system_call_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - int syscallno, ret; + int syscallno = task_pt_regs(target)->syscallno; + int ret; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &syscallno, 0, -1); if (ret) -- GitLab From 5c5839be08423b8a76d40beb59165e7993b4d455 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 18 Jan 2017 16:25:22 +0000 Subject: [PATCH 0784/1442] arm64/ptrace: Preserve previous registers for short regset write - 3 commit a672401c00f82e4e19704aff361d9bad18003714 upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Fixes: 5d220ff9420f ("arm64: Better native ptrace support for compat tasks") Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 59f562bba319..143f770247c2 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -949,7 +949,7 @@ static int compat_tls_set(struct task_struct *target, const void __user *ubuf) { int ret; - compat_ulong_t tls; + compat_ulong_t tls = target->thread.tp_value; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) -- GitLab From f9081dd0c8be396428e04cd1e9ca5e4c86e375d4 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 18 Jan 2017 16:25:23 +0000 Subject: [PATCH 0785/1442] arm64/ptrace: Avoid uninitialised struct padding in fpr_set() commit aeb1f39d814b2e21e5e5706a48834bfd553d0059 upstream. This patch adds an explicit __reserved[] field to user_fpsimd_state to replace what was previously unnamed padding. This ensures that data in this region are propagated across assignment rather than being left possibly uninitialised at the destination. Fixes: 60ffc30d5652 ("arm64: Exception handling") Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/uapi/asm/ptrace.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index b5c3933ed441..d1ff83dfe5de 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -77,6 +77,7 @@ struct user_fpsimd_state { __uint128_t vregs[32]; __u32 fpsr; __u32 fpcr; + __u32 __reserved[2]; }; struct user_hwdebug_state { -- GitLab From 6e53a62a0d529fb5cf8df8304ef02b691fb2736c Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 18 Jan 2017 16:25:24 +0000 Subject: [PATCH 0786/1442] arm64/ptrace: Reject attempts to set incomplete hardware breakpoint fields commit ad9e202aa1ce571b1d7fed969d06f66067f8a086 upstream. We cannot preserve partial fields for hardware breakpoints, because the values written by userspace to the hardware breakpoint registers can't subsequently be recovered intact from the hardware. So, just reject attempts to write incomplete fields with -EINVAL. Fixes: 478fcb2cdb23 ("arm64: Debugging support") Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 143f770247c2..8eedeef375d6 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -550,6 +550,8 @@ static int hw_break_set(struct task_struct *target, /* (address, ctrl) registers */ limit = regset->n * regset->size; while (count && offset < limit) { + if (count < PTRACE_HBP_ADDR_SZ) + return -EINVAL; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr, offset, offset + PTRACE_HBP_ADDR_SZ); if (ret) @@ -559,6 +561,8 @@ static int hw_break_set(struct task_struct *target, return ret; offset += PTRACE_HBP_ADDR_SZ; + if (!count) + break; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, offset, offset + PTRACE_HBP_CTRL_SZ); if (ret) -- GitLab From 6d9b544d88a4a697211062fc2ab2eb0e28c01b13 Mon Sep 17 00:00:00 2001 From: Paul Donohue Date: Mon, 28 Nov 2016 20:11:25 -0800 Subject: [PATCH 0787/1442] Input: ALPS - fix TrackStick support for SS5 hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 864db9295b06837d11a260e5dacf99a3fdf6bce2 upstream. The current Alps SS5 (SS4 v2) code generates bogus TouchPad events when TrackStick packets are processed. This causes the xorg synaptics driver to print "unable to find touch point 0" and "BUG: triggered 'if (priv->num_active_touches > priv->num_slots)'" messages. It also causes unexpected TouchPad button release and re-click event sequences if the TrackStick is moved while holding a TouchPad button. This commit corrects the problem by adjusting alps_process_packet_ss4_v2() so that it only sends TrackStick reports when processing TrackStick packets. Reviewed-by: Pali Rohár Signed-off-by: Paul Donohue Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/alps.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 6d7de9bfed9a..b93fe83a0b63 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1346,6 +1346,18 @@ static void alps_process_packet_ss4_v2(struct psmouse *psmouse) priv->multi_packet = 0; + /* Report trackstick */ + if (alps_get_pkt_id_ss4_v2(packet) == SS4_PACKET_ID_STICK) { + if (priv->flags & ALPS_DUALPOINT) { + input_report_key(dev2, BTN_LEFT, f->ts_left); + input_report_key(dev2, BTN_RIGHT, f->ts_right); + input_report_key(dev2, BTN_MIDDLE, f->ts_middle); + input_sync(dev2); + } + return; + } + + /* Report touchpad */ alps_report_mt_data(psmouse, (f->fingers <= 4) ? f->fingers : 4); input_mt_report_finger_count(dev, f->fingers); @@ -1356,13 +1368,6 @@ static void alps_process_packet_ss4_v2(struct psmouse *psmouse) input_report_abs(dev, ABS_PRESSURE, f->pressure); input_sync(dev); - - if (priv->flags & ALPS_DUALPOINT) { - input_report_key(dev2, BTN_LEFT, f->ts_left); - input_report_key(dev2, BTN_RIGHT, f->ts_right); - input_report_key(dev2, BTN_MIDDLE, f->ts_middle); - input_sync(dev2); - } } static bool alps_is_valid_package_ss4_v2(struct psmouse *psmouse) -- GitLab From 2e62bf3c6fe94b9c7fd37beac4b8b8fcd3f72d71 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:06 +0100 Subject: [PATCH 0788/1442] libceph: ceph_x_encrypt_buflen() takes in_len commit 36721ece1e84a25130c4befb930509b3f96de020 upstream. Pass what's going to be encrypted - that's msg_b, not ticket_blob. ceph_x_encrypt_buflen() returns the upper bound, so this doesn't change the maxlen calculation, but makes it a bit clearer. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_x.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index a0905f04bd13..4d14dc8c7746 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -308,8 +308,8 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, if (ret) goto out_au; - maxlen = sizeof(*msg_a) + sizeof(msg_b) + - ceph_x_encrypt_buflen(ticket_blob_len); + maxlen = sizeof(*msg_a) + ticket_blob_len + + ceph_x_encrypt_buflen(sizeof(msg_b)); dout(" need len %d\n", maxlen); if (au->buf && au->buf->alloc_len < maxlen) { ceph_buffer_put(au->buf); @@ -350,11 +350,12 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, p, end - p); if (ret < 0) goto out_au; + p += ret; + WARN_ON(p > end); au->buf->vec.iov_len = p - au->buf->vec.iov_base; dout(" built authorizer nonce %llx len %d\n", au->nonce, (int)au->buf->vec.iov_len); - BUG_ON(au->buf->vec.iov_len > maxlen); return 0; out_au: -- GitLab From be60457612a22a040103bc98b1ebc72d95d4d6e0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:06 +0100 Subject: [PATCH 0789/1442] libceph: old_key in process_one_ticket() is redundant commit 462e650451c577d15eeb4d883d70fa9e4e529fad upstream. Since commit 0a990e709356 ("ceph: clean up service ticket decoding"), th->session_key isn't assigned until everything is decoded. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_x.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 4d14dc8c7746..28bde937d58a 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -148,7 +148,6 @@ static int process_one_ticket(struct ceph_auth_client *ac, int dlen; char is_enc; struct timespec validity; - struct ceph_crypto_key old_key; void *ticket_buf = NULL; void *tp, *tpend; void **ptp; @@ -187,7 +186,6 @@ static int process_one_ticket(struct ceph_auth_client *ac, if (tkt_struct_v != 1) goto bad; - memcpy(&old_key, &th->session_key, sizeof(old_key)); ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); if (ret) goto out; @@ -204,7 +202,7 @@ static int process_one_ticket(struct ceph_auth_client *ac, if (is_enc) { /* encrypted */ dout(" encrypted ticket\n"); - dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0); + dlen = ceph_x_decrypt(&th->session_key, p, end, &ticket_buf, 0); if (dlen < 0) { ret = dlen; goto out; -- GitLab From 0548b82989384a58e749f0c04781b2c823c6c1b9 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:07 +0100 Subject: [PATCH 0790/1442] libceph: introduce ceph_x_encrypt_offset() commit 55d9cc834f933698fc864f0d36f3cca533d30a8d upstream. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_x.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 28bde937d58a..de30c585c2a2 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -39,10 +39,14 @@ static int ceph_x_should_authenticate(struct ceph_auth_client *ac) return need != 0; } +static int ceph_x_encrypt_offset(void) +{ + return sizeof(u32) + sizeof(struct ceph_x_encrypt_header); +} + static int ceph_x_encrypt_buflen(int ilen) { - return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + - sizeof(u32); + return ceph_x_encrypt_offset() + ilen + 16; } static int ceph_x_encrypt(struct ceph_crypto_key *secret, -- GitLab From ecf7ced8562811576cd02158d6ae8e44c1fb4671 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:07 +0100 Subject: [PATCH 0791/1442] libceph: introduce ceph_crypt() for in-place en/decryption commit a45f795c65b479b4ba107b6ccde29b896d51ee98 upstream. Starting with 4.9, kernel stacks may be vmalloced and therefore not guaranteed to be physically contiguous; the new CONFIG_VMAP_STACK option is enabled by default on x86. This makes it invalid to use on-stack buffers with the crypto scatterlist API, as sg_set_buf() expects a logical address and won't work with vmalloced addresses. There isn't a different (e.g. kvec-based) crypto API we could switch net/ceph/crypto.c to and the current scatterlist.h API isn't getting updated to accommodate this use case. Allocating a new header and padding for each operation is a non-starter, so do the en/decryption in-place on a single pre-assembled (header + data + padding) heap buffer. This is explicitly supported by the crypto API: "... the caller may provide the same scatter/gather list for the plaintext and cipher text. After the completion of the cipher operation, the plaintext data is replaced with the ciphertext data in case of an encryption and vice versa for a decryption." Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/crypto.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++ net/ceph/crypto.h | 2 ++ 2 files changed, 89 insertions(+) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index db2847ac5f12..32099c5c4c75 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -526,6 +526,93 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, } } +static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, + void *buf, int buf_len, int in_len, int *pout_len) +{ + struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); + SKCIPHER_REQUEST_ON_STACK(req, tfm); + struct sg_table sgt; + struct scatterlist prealloc_sg; + char iv[AES_BLOCK_SIZE]; + int pad_byte = AES_BLOCK_SIZE - (in_len & (AES_BLOCK_SIZE - 1)); + int crypt_len = encrypt ? in_len + pad_byte : in_len; + int ret; + + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + WARN_ON(crypt_len > buf_len); + if (encrypt) + memset(buf + in_len, pad_byte, pad_byte); + ret = setup_sgtable(&sgt, &prealloc_sg, buf, crypt_len); + if (ret) + goto out_tfm; + + crypto_skcipher_setkey((void *)tfm, key->key, key->len); + memcpy(iv, aes_iv, AES_BLOCK_SIZE); + + skcipher_request_set_tfm(req, tfm); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv); + + /* + print_hex_dump(KERN_ERR, "key: ", DUMP_PREFIX_NONE, 16, 1, + key->key, key->len, 1); + print_hex_dump(KERN_ERR, " in: ", DUMP_PREFIX_NONE, 16, 1, + buf, crypt_len, 1); + */ + if (encrypt) + ret = crypto_skcipher_encrypt(req); + else + ret = crypto_skcipher_decrypt(req); + skcipher_request_zero(req); + if (ret) { + pr_err("%s %scrypt failed: %d\n", __func__, + encrypt ? "en" : "de", ret); + goto out_sgt; + } + /* + print_hex_dump(KERN_ERR, "out: ", DUMP_PREFIX_NONE, 16, 1, + buf, crypt_len, 1); + */ + + if (encrypt) { + *pout_len = crypt_len; + } else { + pad_byte = *(char *)(buf + in_len - 1); + if (pad_byte > 0 && pad_byte <= AES_BLOCK_SIZE && + in_len >= pad_byte) { + *pout_len = in_len - pad_byte; + } else { + pr_err("%s got bad padding %d on in_len %d\n", + __func__, pad_byte, in_len); + ret = -EPERM; + goto out_sgt; + } + } + +out_sgt: + teardown_sgtable(&sgt); +out_tfm: + crypto_free_skcipher(tfm); + return ret; +} + +int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, + void *buf, int buf_len, int in_len, int *pout_len) +{ + switch (key->type) { + case CEPH_CRYPTO_NONE: + *pout_len = in_len; + return 0; + case CEPH_CRYPTO_AES: + return ceph_aes_crypt(key, encrypt, buf, buf_len, in_len, + pout_len); + default: + return -ENOTSUPP; + } +} + static int ceph_key_preparse(struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey; diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index 2e9cab09f37b..73da34e8c62e 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -43,6 +43,8 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, const void *src1, size_t src1_len, const void *src2, size_t src2_len); +int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, + void *buf, int buf_len, int in_len, int *pout_len); int ceph_crypto_init(void); void ceph_crypto_shutdown(void); -- GitLab From 788a0bbc70115d2964d1b708720b30fd9f91b6ae Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:07 +0100 Subject: [PATCH 0792/1442] libceph: rename and align ceph_x_authorizer::reply_buf commit 7882a26d2e2e520099e2961d5e2e870f8e4172dc upstream. It's going to be used as a temporary buffer for in-place en/decryption with ceph_crypt() instead of on-stack buffers, so rename to enc_buf. Ensure alignment to avoid GFP_ATOMIC allocations in the crypto stack. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_x.c | 10 +++++----- net/ceph/auth_x.h | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index de30c585c2a2..ef1423294517 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -603,8 +603,8 @@ static int ceph_x_create_authorizer( auth->authorizer = (struct ceph_authorizer *) au; auth->authorizer_buf = au->buf->vec.iov_base; auth->authorizer_buf_len = au->buf->vec.iov_len; - auth->authorizer_reply_buf = au->reply_buf; - auth->authorizer_reply_buf_len = sizeof (au->reply_buf); + auth->authorizer_reply_buf = au->enc_buf; + auth->authorizer_reply_buf_len = CEPHX_AU_ENC_BUF_LEN; auth->sign_message = ac->ops->sign_message; auth->check_message_signature = ac->ops->check_message_signature; @@ -638,10 +638,10 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, int ret = 0; struct ceph_x_authorize_reply reply; void *preply = &reply; - void *p = au->reply_buf; - void *end = p + sizeof(au->reply_buf); + void *p = au->enc_buf; - ret = ceph_x_decrypt(&au->session_key, &p, end, &preply, sizeof(reply)); + ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN, + &preply, sizeof(reply)); if (ret < 0) return ret; if (ret != sizeof(reply)) diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index 21a5af904bae..48e9ad41bd2a 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -24,6 +24,7 @@ struct ceph_x_ticket_handler { unsigned long renew_after, expires; }; +#define CEPHX_AU_ENC_BUF_LEN 128 /* big enough for encrypted blob */ struct ceph_x_authorizer { struct ceph_authorizer base; @@ -32,7 +33,7 @@ struct ceph_x_authorizer { unsigned int service; u64 nonce; u64 secret_id; - char reply_buf[128]; /* big enough for encrypted blob */ + char enc_buf[CEPHX_AU_ENC_BUF_LEN] __aligned(8); }; struct ceph_x_info { -- GitLab From 6e371f9a4144c1097a675ca482fc90cbcb0fb4e7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:07 +0100 Subject: [PATCH 0793/1442] libceph: tweak calcu_signature() a little commit 4eb4517ce7c9c573b6c823de403aeccb40018cfc upstream. - replace an ad-hoc array with a struct - rename to calc_signature() for consistency Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_x.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index ef1423294517..78c1675b0df7 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -707,35 +707,48 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH); } -static int calcu_signature(struct ceph_x_authorizer *au, - struct ceph_msg *msg, __le64 *sig) +static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, + __le64 *psig) { - int ret; char tmp_enc[40]; - __le32 tmp[5] = { - cpu_to_le32(16), msg->hdr.crc, msg->footer.front_crc, - msg->footer.middle_crc, msg->footer.data_crc, - }; - ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp), + struct { + __le32 len; + __le32 header_crc; + __le32 front_crc; + __le32 middle_crc; + __le32 data_crc; + } __packed sigblock; + int ret; + + sigblock.len = cpu_to_le32(4*sizeof(u32)); + sigblock.header_crc = msg->hdr.crc; + sigblock.front_crc = msg->footer.front_crc; + sigblock.middle_crc = msg->footer.middle_crc; + sigblock.data_crc = msg->footer.data_crc; + ret = ceph_x_encrypt(&au->session_key, &sigblock, sizeof(sigblock), tmp_enc, sizeof(tmp_enc)); if (ret < 0) return ret; - *sig = *(__le64*)(tmp_enc + 4); + + *psig = *(__le64 *)(tmp_enc + sizeof(u32)); return 0; } static int ceph_x_sign_message(struct ceph_auth_handshake *auth, struct ceph_msg *msg) { + __le64 sig; int ret; if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN)) return 0; - ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, - msg, &msg->footer.sig); - if (ret < 0) + ret = calc_signature((struct ceph_x_authorizer *)auth->authorizer, + msg, &sig); + if (ret) return ret; + + msg->footer.sig = sig; msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED; return 0; } @@ -749,9 +762,9 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth, if (ceph_test_opt(from_msgr(msg->con->msgr), NOMSGSIGN)) return 0; - ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer, - msg, &sig_check); - if (ret < 0) + ret = calc_signature((struct ceph_x_authorizer *)auth->authorizer, + msg, &sig_check); + if (ret) return ret; if (sig_check == msg->footer.sig) return 0; -- GitLab From 717a145bd5a92e3aae1dae1535b83ec38a85fa1b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:07 +0100 Subject: [PATCH 0794/1442] libceph: switch ceph_x_encrypt() to ceph_crypt() commit d03857c63bb036edff0aa7a107276360173aca4e upstream. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_x.c | 71 ++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 78c1675b0df7..a13ce443073b 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -49,22 +49,24 @@ static int ceph_x_encrypt_buflen(int ilen) return ceph_x_encrypt_offset() + ilen + 16; } -static int ceph_x_encrypt(struct ceph_crypto_key *secret, - void *ibuf, int ilen, void *obuf, size_t olen) +static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf, + int buf_len, int plaintext_len) { - struct ceph_x_encrypt_header head = { - .struct_v = 1, - .magic = cpu_to_le64(CEPHX_ENC_MAGIC) - }; - size_t len = olen - sizeof(u32); + struct ceph_x_encrypt_header *hdr = buf + sizeof(u32); + int ciphertext_len; int ret; - ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len, - &head, sizeof(head), ibuf, ilen); + hdr->struct_v = 1; + hdr->magic = cpu_to_le64(CEPHX_ENC_MAGIC); + + ret = ceph_crypt(secret, true, buf + sizeof(u32), buf_len - sizeof(u32), + plaintext_len + sizeof(struct ceph_x_encrypt_header), + &ciphertext_len); if (ret) return ret; - ceph_encode_32(&obuf, len); - return len + sizeof(u32); + + ceph_encode_32(&buf, ciphertext_len); + return sizeof(u32) + ciphertext_len; } static int ceph_x_decrypt(struct ceph_crypto_key *secret, @@ -296,7 +298,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, { int maxlen; struct ceph_x_authorize_a *msg_a; - struct ceph_x_authorize_b msg_b; + struct ceph_x_authorize_b *msg_b; void *p, *end; int ret; int ticket_blob_len = @@ -311,7 +313,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, goto out_au; maxlen = sizeof(*msg_a) + ticket_blob_len + - ceph_x_encrypt_buflen(sizeof(msg_b)); + ceph_x_encrypt_buflen(sizeof(*msg_b)); dout(" need len %d\n", maxlen); if (au->buf && au->buf->alloc_len < maxlen) { ceph_buffer_put(au->buf); @@ -345,11 +347,11 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, p += ticket_blob_len; end = au->buf->vec.iov_base + au->buf->vec.iov_len; + msg_b = p + ceph_x_encrypt_offset(); + msg_b->struct_v = 1; get_random_bytes(&au->nonce, sizeof(au->nonce)); - msg_b.struct_v = 1; - msg_b.nonce = cpu_to_le64(au->nonce); - ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b), - p, end - p); + msg_b->nonce = cpu_to_le64(au->nonce); + ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); if (ret < 0) goto out_au; @@ -455,8 +457,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, if (need & CEPH_ENTITY_TYPE_AUTH) { struct ceph_x_authenticate *auth = (void *)(head + 1); void *p = auth + 1; - struct ceph_x_challenge_blob tmp; - char tmp_enc[40]; + void *enc_buf = xi->auth_authorizer.enc_buf; + struct ceph_x_challenge_blob *blob = enc_buf + + ceph_x_encrypt_offset(); u64 *u; if (p > end) @@ -467,16 +470,16 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, /* encrypt and hash */ get_random_bytes(&auth->client_challenge, sizeof(u64)); - tmp.client_challenge = auth->client_challenge; - tmp.server_challenge = cpu_to_le64(xi->server_challenge); - ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp), - tmp_enc, sizeof(tmp_enc)); + blob->client_challenge = auth->client_challenge; + blob->server_challenge = cpu_to_le64(xi->server_challenge); + ret = ceph_x_encrypt(&xi->secret, enc_buf, CEPHX_AU_ENC_BUF_LEN, + sizeof(*blob)); if (ret < 0) return ret; auth->struct_v = 1; auth->key = 0; - for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) + for (u = (u64 *)enc_buf; u + 1 <= (u64 *)(enc_buf + ret); u++) auth->key ^= *(__le64 *)u; dout(" server_challenge %llx client_challenge %llx key %llx\n", xi->server_challenge, le64_to_cpu(auth->client_challenge), @@ -710,27 +713,27 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg, __le64 *psig) { - char tmp_enc[40]; + void *enc_buf = au->enc_buf; struct { __le32 len; __le32 header_crc; __le32 front_crc; __le32 middle_crc; __le32 data_crc; - } __packed sigblock; + } __packed *sigblock = enc_buf + ceph_x_encrypt_offset(); int ret; - sigblock.len = cpu_to_le32(4*sizeof(u32)); - sigblock.header_crc = msg->hdr.crc; - sigblock.front_crc = msg->footer.front_crc; - sigblock.middle_crc = msg->footer.middle_crc; - sigblock.data_crc = msg->footer.data_crc; - ret = ceph_x_encrypt(&au->session_key, &sigblock, sizeof(sigblock), - tmp_enc, sizeof(tmp_enc)); + sigblock->len = cpu_to_le32(4*sizeof(u32)); + sigblock->header_crc = msg->hdr.crc; + sigblock->front_crc = msg->footer.front_crc; + sigblock->middle_crc = msg->footer.middle_crc; + sigblock->data_crc = msg->footer.data_crc; + ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN, + sizeof(*sigblock)); if (ret < 0) return ret; - *psig = *(__le64 *)(tmp_enc + sizeof(u32)); + *psig = *(__le64 *)(enc_buf + sizeof(u32)); return 0; } -- GitLab From 2982b9c92a66604ffb9fb2db54cf735133d1ef56 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:08 +0100 Subject: [PATCH 0795/1442] libceph: switch ceph_x_decrypt() to ceph_crypt() commit e15fd0a11db00fc7f470a9fc804657ec3f6d04a5 upstream. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/auth_x.c | 78 +++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 46 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index a13ce443073b..b216131915e7 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -69,32 +69,28 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf, return sizeof(u32) + ciphertext_len; } -static int ceph_x_decrypt(struct ceph_crypto_key *secret, - void **p, void *end, void **obuf, size_t olen) +static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end) { - struct ceph_x_encrypt_header head; - size_t head_len = sizeof(head); - int len, ret; - - len = ceph_decode_32(p); - if (*p + len > end) - return -EINVAL; + struct ceph_x_encrypt_header *hdr = *p + sizeof(u32); + int ciphertext_len, plaintext_len; + int ret; - dout("ceph_x_decrypt len %d\n", len); - if (*obuf == NULL) { - *obuf = kmalloc(len, GFP_NOFS); - if (!*obuf) - return -ENOMEM; - olen = len; - } + ceph_decode_32_safe(p, end, ciphertext_len, e_inval); + ceph_decode_need(p, end, ciphertext_len, e_inval); - ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len); + ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len, + &plaintext_len); if (ret) return ret; - if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC) + + if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) return -EPERM; - *p += len; - return olen; + + *p += ciphertext_len; + return plaintext_len - sizeof(struct ceph_x_encrypt_header); + +e_inval: + return -EINVAL; } /* @@ -149,12 +145,10 @@ static int process_one_ticket(struct ceph_auth_client *ac, int type; u8 tkt_struct_v, blob_struct_v; struct ceph_x_ticket_handler *th; - void *dbuf = NULL; void *dp, *dend; int dlen; char is_enc; struct timespec validity; - void *ticket_buf = NULL; void *tp, *tpend; void **ptp; struct ceph_crypto_key new_session_key; @@ -179,14 +173,12 @@ static int process_one_ticket(struct ceph_auth_client *ac, } /* blob for me */ - dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0); - if (dlen <= 0) { - ret = dlen; + dp = *p + ceph_x_encrypt_offset(); + ret = ceph_x_decrypt(secret, p, end); + if (ret < 0) goto out; - } - dout(" decrypted %d bytes\n", dlen); - dp = dbuf; - dend = dp + dlen; + dout(" decrypted %d bytes\n", ret); + dend = dp + ret; tkt_struct_v = ceph_decode_8(&dp); if (tkt_struct_v != 1) @@ -207,15 +199,13 @@ static int process_one_ticket(struct ceph_auth_client *ac, ceph_decode_8_safe(p, end, is_enc, bad); if (is_enc) { /* encrypted */ - dout(" encrypted ticket\n"); - dlen = ceph_x_decrypt(&th->session_key, p, end, &ticket_buf, 0); - if (dlen < 0) { - ret = dlen; + tp = *p + ceph_x_encrypt_offset(); + ret = ceph_x_decrypt(&th->session_key, p, end); + if (ret < 0) goto out; - } - tp = ticket_buf; + dout(" encrypted ticket, decrypted %d bytes\n", ret); ptp = &tp; - tpend = *ptp + dlen; + tpend = tp + ret; } else { /* unencrypted */ ptp = p; @@ -246,8 +236,6 @@ static int process_one_ticket(struct ceph_auth_client *ac, xi->have_keys |= th->service; out: - kfree(ticket_buf); - kfree(dbuf); return ret; bad: @@ -638,24 +626,22 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a, size_t len) { struct ceph_x_authorizer *au = (void *)a; - int ret = 0; - struct ceph_x_authorize_reply reply; - void *preply = &reply; void *p = au->enc_buf; + struct ceph_x_authorize_reply *reply = p + ceph_x_encrypt_offset(); + int ret; - ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN, - &preply, sizeof(reply)); + ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN); if (ret < 0) return ret; - if (ret != sizeof(reply)) + if (ret != sizeof(*reply)) return -EPERM; - if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one)) + if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one)) ret = -EPERM; else ret = 0; dout("verify_authorizer_reply nonce %llx got %llx ret %d\n", - au->nonce, le64_to_cpu(reply.nonce_plus_one), ret); + au->nonce, le64_to_cpu(reply->nonce_plus_one), ret); return ret; } -- GitLab From b8add6715c9ada1619e6db6d4e106c03df94267c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:08 +0100 Subject: [PATCH 0796/1442] libceph: remove now unused ceph_*{en,de}crypt*() functions commit 2b1e1a7cd0a615d57455567a549f9965023321b5 upstream. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/crypto.c | 369 ---------------------------------------------- net/ceph/crypto.h | 14 -- 2 files changed, 383 deletions(-) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 32099c5c4c75..38936e1fd644 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -157,375 +157,6 @@ static void teardown_sgtable(struct sg_table *sgt) sg_free_table(sgt); } -static int ceph_aes_encrypt(const void *key, int key_len, - void *dst, size_t *dst_len, - const void *src, size_t src_len) -{ - struct scatterlist sg_in[2], prealloc_sg; - struct sg_table sg_out; - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); - SKCIPHER_REQUEST_ON_STACK(req, tfm); - int ret; - char iv[AES_BLOCK_SIZE]; - size_t zero_padding = (0x10 - (src_len & 0x0f)); - char pad[16]; - - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - memset(pad, zero_padding, zero_padding); - - *dst_len = src_len + zero_padding; - - sg_init_table(sg_in, 2); - sg_set_buf(&sg_in[0], src, src_len); - sg_set_buf(&sg_in[1], pad, zero_padding); - ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); - if (ret) - goto out_tfm; - - crypto_skcipher_setkey((void *)tfm, key, key_len); - memcpy(iv, aes_iv, AES_BLOCK_SIZE); - - skcipher_request_set_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg_in, sg_out.sgl, - src_len + zero_padding, iv); - - /* - print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, - key, key_len, 1); - print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1, - src, src_len, 1); - print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, - pad, zero_padding, 1); - */ - ret = crypto_skcipher_encrypt(req); - skcipher_request_zero(req); - if (ret < 0) { - pr_err("ceph_aes_crypt failed %d\n", ret); - goto out_sg; - } - /* - print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, - dst, *dst_len, 1); - */ - -out_sg: - teardown_sgtable(&sg_out); -out_tfm: - crypto_free_skcipher(tfm); - return ret; -} - -static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, - size_t *dst_len, - const void *src1, size_t src1_len, - const void *src2, size_t src2_len) -{ - struct scatterlist sg_in[3], prealloc_sg; - struct sg_table sg_out; - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); - SKCIPHER_REQUEST_ON_STACK(req, tfm); - int ret; - char iv[AES_BLOCK_SIZE]; - size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f)); - char pad[16]; - - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - memset(pad, zero_padding, zero_padding); - - *dst_len = src1_len + src2_len + zero_padding; - - sg_init_table(sg_in, 3); - sg_set_buf(&sg_in[0], src1, src1_len); - sg_set_buf(&sg_in[1], src2, src2_len); - sg_set_buf(&sg_in[2], pad, zero_padding); - ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); - if (ret) - goto out_tfm; - - crypto_skcipher_setkey((void *)tfm, key, key_len); - memcpy(iv, aes_iv, AES_BLOCK_SIZE); - - skcipher_request_set_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg_in, sg_out.sgl, - src1_len + src2_len + zero_padding, iv); - - /* - print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, - key, key_len, 1); - print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1, - src1, src1_len, 1); - print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1, - src2, src2_len, 1); - print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, - pad, zero_padding, 1); - */ - ret = crypto_skcipher_encrypt(req); - skcipher_request_zero(req); - if (ret < 0) { - pr_err("ceph_aes_crypt2 failed %d\n", ret); - goto out_sg; - } - /* - print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, - dst, *dst_len, 1); - */ - -out_sg: - teardown_sgtable(&sg_out); -out_tfm: - crypto_free_skcipher(tfm); - return ret; -} - -static int ceph_aes_decrypt(const void *key, int key_len, - void *dst, size_t *dst_len, - const void *src, size_t src_len) -{ - struct sg_table sg_in; - struct scatterlist sg_out[2], prealloc_sg; - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); - SKCIPHER_REQUEST_ON_STACK(req, tfm); - char pad[16]; - char iv[AES_BLOCK_SIZE]; - int ret; - int last_byte; - - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - sg_init_table(sg_out, 2); - sg_set_buf(&sg_out[0], dst, *dst_len); - sg_set_buf(&sg_out[1], pad, sizeof(pad)); - ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); - if (ret) - goto out_tfm; - - crypto_skcipher_setkey((void *)tfm, key, key_len); - memcpy(iv, aes_iv, AES_BLOCK_SIZE); - - skcipher_request_set_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg_in.sgl, sg_out, - src_len, iv); - - /* - print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, - key, key_len, 1); - print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, - src, src_len, 1); - */ - ret = crypto_skcipher_decrypt(req); - skcipher_request_zero(req); - if (ret < 0) { - pr_err("ceph_aes_decrypt failed %d\n", ret); - goto out_sg; - } - - if (src_len <= *dst_len) - last_byte = ((char *)dst)[src_len - 1]; - else - last_byte = pad[src_len - *dst_len - 1]; - if (last_byte <= 16 && src_len >= last_byte) { - *dst_len = src_len - last_byte; - } else { - pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n", - last_byte, (int)src_len); - return -EPERM; /* bad padding */ - } - /* - print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1, - dst, *dst_len, 1); - */ - -out_sg: - teardown_sgtable(&sg_in); -out_tfm: - crypto_free_skcipher(tfm); - return ret; -} - -static int ceph_aes_decrypt2(const void *key, int key_len, - void *dst1, size_t *dst1_len, - void *dst2, size_t *dst2_len, - const void *src, size_t src_len) -{ - struct sg_table sg_in; - struct scatterlist sg_out[3], prealloc_sg; - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); - SKCIPHER_REQUEST_ON_STACK(req, tfm); - char pad[16]; - char iv[AES_BLOCK_SIZE]; - int ret; - int last_byte; - - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - sg_init_table(sg_out, 3); - sg_set_buf(&sg_out[0], dst1, *dst1_len); - sg_set_buf(&sg_out[1], dst2, *dst2_len); - sg_set_buf(&sg_out[2], pad, sizeof(pad)); - ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); - if (ret) - goto out_tfm; - - crypto_skcipher_setkey((void *)tfm, key, key_len); - memcpy(iv, aes_iv, AES_BLOCK_SIZE); - - skcipher_request_set_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg_in.sgl, sg_out, - src_len, iv); - - /* - print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, - key, key_len, 1); - print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, - src, src_len, 1); - */ - ret = crypto_skcipher_decrypt(req); - skcipher_request_zero(req); - if (ret < 0) { - pr_err("ceph_aes_decrypt failed %d\n", ret); - goto out_sg; - } - - if (src_len <= *dst1_len) - last_byte = ((char *)dst1)[src_len - 1]; - else if (src_len <= *dst1_len + *dst2_len) - last_byte = ((char *)dst2)[src_len - *dst1_len - 1]; - else - last_byte = pad[src_len - *dst1_len - *dst2_len - 1]; - if (last_byte <= 16 && src_len >= last_byte) { - src_len -= last_byte; - } else { - pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n", - last_byte, (int)src_len); - return -EPERM; /* bad padding */ - } - - if (src_len < *dst1_len) { - *dst1_len = src_len; - *dst2_len = 0; - } else { - *dst2_len = src_len - *dst1_len; - } - /* - print_hex_dump(KERN_ERR, "dec out1: ", DUMP_PREFIX_NONE, 16, 1, - dst1, *dst1_len, 1); - print_hex_dump(KERN_ERR, "dec out2: ", DUMP_PREFIX_NONE, 16, 1, - dst2, *dst2_len, 1); - */ - -out_sg: - teardown_sgtable(&sg_in); -out_tfm: - crypto_free_skcipher(tfm); - return ret; -} - - -int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, - const void *src, size_t src_len) -{ - switch (secret->type) { - case CEPH_CRYPTO_NONE: - if (*dst_len < src_len) - return -ERANGE; - memcpy(dst, src, src_len); - *dst_len = src_len; - return 0; - - case CEPH_CRYPTO_AES: - return ceph_aes_decrypt(secret->key, secret->len, dst, - dst_len, src, src_len); - - default: - return -EINVAL; - } -} - -int ceph_decrypt2(struct ceph_crypto_key *secret, - void *dst1, size_t *dst1_len, - void *dst2, size_t *dst2_len, - const void *src, size_t src_len) -{ - size_t t; - - switch (secret->type) { - case CEPH_CRYPTO_NONE: - if (*dst1_len + *dst2_len < src_len) - return -ERANGE; - t = min(*dst1_len, src_len); - memcpy(dst1, src, t); - *dst1_len = t; - src += t; - src_len -= t; - if (src_len) { - t = min(*dst2_len, src_len); - memcpy(dst2, src, t); - *dst2_len = t; - } - return 0; - - case CEPH_CRYPTO_AES: - return ceph_aes_decrypt2(secret->key, secret->len, - dst1, dst1_len, dst2, dst2_len, - src, src_len); - - default: - return -EINVAL; - } -} - -int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, - const void *src, size_t src_len) -{ - switch (secret->type) { - case CEPH_CRYPTO_NONE: - if (*dst_len < src_len) - return -ERANGE; - memcpy(dst, src, src_len); - *dst_len = src_len; - return 0; - - case CEPH_CRYPTO_AES: - return ceph_aes_encrypt(secret->key, secret->len, dst, - dst_len, src, src_len); - - default: - return -EINVAL; - } -} - -int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, - const void *src1, size_t src1_len, - const void *src2, size_t src2_len) -{ - switch (secret->type) { - case CEPH_CRYPTO_NONE: - if (*dst_len < src1_len + src2_len) - return -ERANGE; - memcpy(dst, src1, src1_len); - memcpy(dst + src1_len, src2, src2_len); - *dst_len = src1_len + src2_len; - return 0; - - case CEPH_CRYPTO_AES: - return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len, - src1, src1_len, src2, src2_len); - - default: - return -EINVAL; - } -} - static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, void *buf, int buf_len, int in_len, int *pout_len) { diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index 73da34e8c62e..c33bcafd3829 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -29,20 +29,6 @@ int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end); int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in); /* crypto.c */ -int ceph_decrypt(struct ceph_crypto_key *secret, - void *dst, size_t *dst_len, - const void *src, size_t src_len); -int ceph_encrypt(struct ceph_crypto_key *secret, - void *dst, size_t *dst_len, - const void *src, size_t src_len); -int ceph_decrypt2(struct ceph_crypto_key *secret, - void *dst1, size_t *dst1_len, - void *dst2, size_t *dst2_len, - const void *src, size_t src_len); -int ceph_encrypt2(struct ceph_crypto_key *secret, - void *dst, size_t *dst_len, - const void *src1, size_t src1_len, - const void *src2, size_t src2_len); int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, void *buf, int buf_len, int in_len, int *pout_len); int ceph_crypto_init(void); -- GitLab From d4f12aa133dbac336925ce7c1929b141819dcebf Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 19 Dec 2016 11:44:41 -0300 Subject: [PATCH 0797/1442] ARM: dts: dra7: Add an empty chosen node to top level DTSI commit 7f6c857b12911ed56b2056f9d5491e16b5fc95ea upstream. Commit 55871eb6e2cc ("ARM: dts: dra7: Remove skeleton.dtsi usage") removed the skeleton.dtsi usage since we want to get rid of it. But this can cause issues when booting a kernel with a boot-loader that doesn't create a chosen node if this isn't present in the DTB since the decompressor relies on a pre-existing chosen node to be available to insert the command line and merge other ATAGS info. Fixes: 55871eb6e2cc ("ARM: dts: dra7: Remove skeleton.dtsi usage") Reported-by: Pali Rohar Signed-off-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dra7.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index ef888295aa7b..064d84f87e45 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -18,6 +18,7 @@ compatible = "ti,dra7xx"; interrupt-parent = <&crossbar_mpu>; + chosen { }; aliases { i2c0 = &i2c1; -- GitLab From ab6dc01db1f7df8957e451af93ece9327135bbac Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 19 Dec 2016 11:44:40 -0300 Subject: [PATCH 0798/1442] ARM: dts: dm816x: Add an empty chosen node to top level DTSI commit 6ed80b3a232e61da6d0189bbbe2b2b9afaefe3b3 upstream. Commit 06bfb9c19957 ("ARM: dts: dm816x: Remove skeleton.dtsi usage") removed the skeleton.dtsi usage since we want to get rid of it. But this can cause issues when booting a kernel with a boot-loader that doesn't create a chosen node if this isn't present in the DTB since the decompressor relies on a pre-existing chosen node to be available to insert the command line and merge other ATAGS info. Fixes: 06bfb9c19957 ("ARM: dts: dm816x: Remove skeleton.dtsi usage") Reported-by: Pali Rohar Signed-off-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dm816x.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi index f1e0f771ff29..cbdfbc4e4a26 100644 --- a/arch/arm/boot/dts/dm816x.dtsi +++ b/arch/arm/boot/dts/dm816x.dtsi @@ -12,6 +12,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; -- GitLab From 3e1c70972204add48a338ee9170a6c0358304e9a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 19 Dec 2016 11:44:39 -0300 Subject: [PATCH 0799/1442] ARM: dts: dm814x: Add an empty chosen node to top level DTSI commit 9536fd30d41ae4f30d04762676e5f5f602e16aa8 upstream. Commit 76155b378c59 ("ARM: dts: dm814x: Remove skeleton.dtsi usage") removed the skeleton.dtsi usage since we want to get rid of it. But this can cause issues when booting a kernel with a boot-loader that doesn't create a chosen node if this isn't present in the DTB since the decompressor relies on a pre-existing chosen node to be available to insert the command line and merge other ATAGS info. Fixes: 76155b378c59 ("ARM: dts: dm814x: Remove skeleton.dtsi usage") Reported-by: Pali Rohar Signed-off-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/dm814x.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/dm814x.dtsi b/arch/arm/boot/dts/dm814x.dtsi index ff90a6ce6bdc..d87efab24fa2 100644 --- a/arch/arm/boot/dts/dm814x.dtsi +++ b/arch/arm/boot/dts/dm814x.dtsi @@ -12,6 +12,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; -- GitLab From 355a8fced2bf8d0c242b8a9baffa7f07abc6ca12 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 19 Dec 2016 11:44:37 -0300 Subject: [PATCH 0800/1442] ARM: dts: am33xx: Add an empty chosen node to top level DTSI commit 1d8d6d3f2f7d553c479f24ab93767974a8c2dfad upstream. Commit f8bf01611c99 ("ARM: dts: am33xx: Remove skeleton.dtsi usage") removed the skeleton.dtsi usage since we want to get rid of it. But this can cause issues when booting a kernel with a boot-loader that doesn't create a chosen node if this isn't present in the DTB since the decompressor relies on a pre-existing chosen node to be available to insert the command line and merge other ATAGS info. Fixes: f8bf01611c99 ("ARM: dts: am33xx: Remove skeleton.dtsi usage") Reported-by: Pali Rohar Signed-off-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am33xx.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 194d884c9de1..795c1467fa50 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -16,6 +16,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c0; -- GitLab From 835bf872d924a8442ef6d9c8fb078a5eed48c8a1 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 19 Dec 2016 11:44:35 -0300 Subject: [PATCH 0801/1442] ARM: dts: omap4: Add an empty chosen node to top level DTSI commit 6c565d1a63ce241a0100f5d327c48dde87b4df76 upstream. Commit da6269e7e3dd ("ARM: dts: omap4: Remove skeleton.dtsi usage") removed the skeleton.dtsi usage since we want to get rid of it. But this can cause issues when booting a kernel with a boot-loader that doesn't create a chosen node if this isn't present in the DTB since the decompressor relies on a pre-existing chosen node to be available to insert the command line and merge other ATAGS info. Fixes: da6269e7e3dd ("ARM: dts: omap4: Remove skeleton.dtsi usage") Reported-by: Pali Rohar Signed-off-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap4.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 0ced079b7ae3..9c289ddab3df 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -15,6 +15,7 @@ interrupt-parent = <&wakeupgen>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; -- GitLab From c3f7ca43b2d4eaf797d11d815326f78d092371ea Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 19 Dec 2016 11:44:36 -0300 Subject: [PATCH 0802/1442] ARM: dts: omap5: Add an empty chosen node to top level DTSI commit c9faa84cb9c34852ad70cb175457ae21fc06f39b upstream. Commit 76a8548ea987 ("ARM: dts: omap5: Remove skeleton.dtsi usage") removed the skeleton.dtsi usage since we want to get rid of it. But this can cause issues when booting a kernel with a boot-loader that doesn't create a chosen node if this isn't present in the DTB since the decompressor relies on a pre-existing chosen node to be available to insert the command line and merge other ATAGS info. Fixes: 76a8548ea987 ("ARM: dts: omap5: Remove skeleton.dtsi usage") Reported-by: Pali Rohar Signed-off-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap5.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index f8aa0cbc5f01..1d1d8e90cd80 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -17,6 +17,7 @@ compatible = "ti,omap5"; interrupt-parent = <&wakeupgen>; + chosen { }; aliases { i2c0 = &i2c1; -- GitLab From bec062cd47bd0184ec88a80c06f695acae8999ff Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 19 Dec 2016 11:44:38 -0300 Subject: [PATCH 0803/1442] ARM: dts: am4372: Add an empty chosen node to top level DTSI commit ce95077d0cdfcc8e40dea10a1680249831ccec77 upstream. Commit 75813028bbd7 ("ARM: dts: am4372: Remove skeleton.dtsi usage") removed the skeleton.dtsi usage since we want to get rid of it. But this can cause issues when booting a kernel with a boot-loader that doesn't create a chosen node if this isn't present in the DTB since the decompressor relies on a pre-existing chosen node to be available to insert the command line and merge other ATAGS info. Fixes: 75813028bbd7 ("ARM: dts: am4372: Remove skeleton.dtsi usage") Reported-by: Pali Rohar Signed-off-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/am4372.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index a275fa956813..a20a71d9d22e 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -16,6 +16,7 @@ interrupt-parent = <&wakeupgen>; #address-cells = <1>; #size-cells = <1>; + chosen { }; memory@0 { device_type = "memory"; -- GitLab From 5921b26bf7445591055bbe385b91d2ea866e7278 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 19 Dec 2016 11:44:34 -0300 Subject: [PATCH 0804/1442] ARM: dts: omap3: Add an empty chosen node to top level DTSI commit 23ab4c6183ac0679d80888b5c4cc1d528fcc21c2 upstream. Commit 008a2ebcd677 ("ARM: dts: omap3: Remove skeleton.dtsi usage") removed the skeleton.dtsi usage since we want to get rid of it. But this can cause issues when booting a kernel with a boot-loader that doesn't create a chosen node if this isn't present in the DTB since the decompressor relies on a pre-existing chosen node to be available to insert the command line and merge other ATAGS info. Fixes: 008a2ebcd677 ("ARM: dts: omap3: Remove skeleton.dtsi usage") Reported-by: Pali Rohar Signed-off-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap3.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index 353d818ce5a6..2008648b8c9f 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -17,6 +17,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { i2c0 = &i2c1; -- GitLab From cfcb94b3a498108364db1ad1c36742e86d0c5c4c Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 19 Dec 2016 11:44:33 -0300 Subject: [PATCH 0805/1442] ARM: dts: omap2: Add an empty chosen node to top level DTSI commit 3d37d41a148c32389ed360e10a9f8a7cd37ce166 upstream. Commit d1f3156fc8c7 ("ARM: dts: omap2: Remove skeleton.dtsi usage") removed the skeleton.dtsi usage since we want to get rid of it. But this can cause issues when booting a kernel with a boot-loader that doesn't create a chosen node if this isn't present in the DTB since the decompressor relies on a pre-existing chosen node to be available to insert the command line and merge other ATAGS info. Fixes: d1f3156fc8c7 ("ARM: dts: omap2: Remove skeleton.dtsi usage") Reported-by: Pali Rohar Signed-off-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap2.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap2.dtsi b/arch/arm/boot/dts/omap2.dtsi index 4f793a025a72..f1d6de8b3c19 100644 --- a/arch/arm/boot/dts/omap2.dtsi +++ b/arch/arm/boot/dts/omap2.dtsi @@ -17,6 +17,7 @@ interrupt-parent = <&intc>; #address-cells = <1>; #size-cells = <1>; + chosen { }; aliases { serial0 = &uart1; -- GitLab From a075ac9c0a40f20ad14f052ee34e88922703ed20 Mon Sep 17 00:00:00 2001 From: Gary Bisson Date: Tue, 3 Jan 2017 12:22:46 +0100 Subject: [PATCH 0806/1442] ARM: dts: imx6qdl-nitrogen6_max: fix sgtl5000 pinctrl init commit 6ab5c2b662e2dcbb964099bf7f19e9dbc9ae5a41 upstream. This patch fixes the following error: sgtl5000 0-000a: Error reading chip id -6 imx-sgtl5000 sound: ASoC: CODEC DAI sgtl5000 not registered imx-sgtl5000 sound: snd_soc_register_card failed (-517) The problem was that the pinctrl group was linked to the sound driver instead of the codec node. Since the codec is probed first, the sys_mclk was missing and it would therefore fail to initialize. Fixes: b32e700256bc ("ARM: dts: imx: add Boundary Devices Nitrogen6_Max board") Signed-off-by: Gary Bisson Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi index b0b3220a1fd9..01166ba36f27 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi @@ -319,8 +319,6 @@ compatible = "fsl,imx6q-nitrogen6_max-sgtl5000", "fsl,imx-audio-sgtl5000"; model = "imx6q-nitrogen6_max-sgtl5000"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_sgtl5000>; ssi-controller = <&ssi1>; audio-codec = <&codec>; audio-routing = @@ -402,6 +400,8 @@ codec: sgtl5000@0a { compatible = "fsl,sgtl5000"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_sgtl5000>; reg = <0x0a>; clocks = <&clks IMX6QDL_CLK_CKO>; VDDA-supply = <®_2p5v>; -- GitLab From cd9601caa2fa45e768ec6bf1fa1375bbb566466f Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 17 Jan 2017 13:34:06 -0600 Subject: [PATCH 0807/1442] ARM: dts: omap3: Fix Card Detect and Write Protect on Logic PD SOM-LV commit 1ea6af3216b092ec97129ac81bd95cf254c4b140 upstream. This fixes commit ab8dd3aed011 ("ARM: DTS: Add minimal Support for Logic PD DM3730 SOM-LV") where the Card Detect and Write Protect pins were improperly configured. Fixes: ab8dd3aed011 ("ARM: DTS: Add minimal Support for Logic PD DM3730 SOM-LV") Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts index da8598402ab8..38faa90007d7 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts @@ -158,7 +158,7 @@ &mmc1 { interrupts-extended = <&intc 83 &omap3_pmx_core 0x11a>; pinctrl-names = "default"; - pinctrl-0 = <&mmc1_pins &mmc1_cd>; + pinctrl-0 = <&mmc1_pins>; wp-gpios = <&gpio4 30 GPIO_ACTIVE_HIGH>; /* gpio_126 */ cd-gpios = <&gpio4 14 IRQ_TYPE_LEVEL_LOW>; /* gpio_110 */ vmmc-supply = <&vmmc1>; @@ -193,7 +193,8 @@ OMAP3_CORE1_IOPAD(0x214a, PIN_INPUT | MUX_MODE0) /* sdmmc1_dat1.sdmmc1_dat1 */ OMAP3_CORE1_IOPAD(0x214c, PIN_INPUT | MUX_MODE0) /* sdmmc1_dat2.sdmmc1_dat2 */ OMAP3_CORE1_IOPAD(0x214e, PIN_INPUT | MUX_MODE0) /* sdmmc1_dat3.sdmmc1_dat3 */ - OMAP3_CORE1_IOPAD(0x2132, PIN_INPUT_PULLUP | MUX_MODE4) /* cam_strobe.gpio_126 sdmmc1_wp*/ + OMAP3_CORE1_IOPAD(0x2132, PIN_INPUT_PULLUP | MUX_MODE4) /* cam_strobe.gpio_126 */ + OMAP3_CORE1_IOPAD(0x212c, PIN_INPUT_PULLUP | MUX_MODE4) /* cam_d11.gpio_110 */ >; }; @@ -242,12 +243,6 @@ OMAP3_WKUP_IOPAD(0x2a16, PIN_OUTPUT | PIN_OFF_OUTPUT_LOW | MUX_MODE4) /* sys_boot6.gpio_8 */ >; }; - - mmc1_cd: pinmux_mmc1_cd { - pinctrl-single,pins = < - OMAP3_WKUP_IOPAD(0x212c, PIN_INPUT_PULLUP | MUX_MODE4) /* cam_d11.gpio_110 */ - >; - }; }; -- GitLab From dd8334a5e17e291f503cada56a0d6aed3ddb3c12 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Nov 2016 16:20:37 +0100 Subject: [PATCH 0808/1442] ARM: ux500: fix prcmu_is_cpu_in_wfi() calculation commit f0e8faa7a5e894b0fc99d24be1b18685a92ea466 upstream. This function clearly never worked and always returns true, as pointed out by gcc-7: arch/arm/mach-ux500/pm.c: In function 'prcmu_is_cpu_in_wfi': arch/arm/mach-ux500/pm.c:137:212: error: ?: using integer constants in boolean context, the expression will always evaluate to 'true' [-Werror=int-in-bool-context] With the added braces, the condition actually makes sense. Fixes: 34fe6f107eab ("mfd : Check if the other db8500 core is in WFI") Signed-off-by: Arnd Bergmann Acked-by: Daniel Lezcano Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-ux500/pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-ux500/pm.c b/arch/arm/mach-ux500/pm.c index 8538910db202..a970e7fcba9e 100644 --- a/arch/arm/mach-ux500/pm.c +++ b/arch/arm/mach-ux500/pm.c @@ -134,8 +134,8 @@ bool prcmu_pending_irq(void) */ bool prcmu_is_cpu_in_wfi(int cpu) { - return readl(PRCM_ARM_WFI_STANDBY) & cpu ? PRCM_ARM_WFI_STANDBY_WFI1 : - PRCM_ARM_WFI_STANDBY_WFI0; + return readl(PRCM_ARM_WFI_STANDBY) & + (cpu ? PRCM_ARM_WFI_STANDBY_WFI1 : PRCM_ARM_WFI_STANDBY_WFI0); } /* -- GitLab From 04c9fe63166fa9a3004e770c36ec1eb7cdfd5465 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 13 Sep 2016 12:31:17 +0100 Subject: [PATCH 0809/1442] ARM: 8613/1: Fix the uaccess crash on PB11MPCore commit 90f92c631b210c1e97080b53a9d863783281a932 upstream. The following patch was sketched by Russell in response to my crashes on the PB11MPCore after the patch for software-based priviledged no access support for ARMv8.1. See this thread: http://marc.info/?l=linux-arm-kernel&m=144051749807214&w=2 I am unsure what is going on, I suspect everyone involved in the discussion is. I just want to repost this to get the discussion restarted, as I still have to apply this patch with every kernel iteration to get my PB11MPCore Realview running. Testing by Neil Armstrong on the Oxnas NAS has revealed that this bug exist also on that widely deployed hardware, so we are probably currently regressing all ARM11MPCore systems. Cc: Russell King Cc: Will Deacon Fixes: a5e090acbf54 ("ARM: software-based priviledged-no-access support") Tested-by: Neil Armstrong Signed-off-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/smp_tlb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 22313cb53362..9af0701f7094 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -9,6 +9,7 @@ */ #include #include +#include #include #include @@ -40,8 +41,11 @@ static inline void ipi_flush_tlb_mm(void *arg) static inline void ipi_flush_tlb_page(void *arg) { struct tlb_args *ta = (struct tlb_args *)arg; + unsigned int __ua_flags = uaccess_save_and_enable(); local_flush_tlb_page(ta->ta_vma, ta->ta_start); + + uaccess_restore(__ua_flags); } static inline void ipi_flush_tlb_kernel_page(void *arg) @@ -54,8 +58,11 @@ static inline void ipi_flush_tlb_kernel_page(void *arg) static inline void ipi_flush_tlb_range(void *arg) { struct tlb_args *ta = (struct tlb_args *)arg; + unsigned int __ua_flags = uaccess_save_and_enable(); local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); + + uaccess_restore(__ua_flags); } static inline void ipi_flush_tlb_kernel_range(void *arg) -- GitLab From 48baa924108e124bc5221ca73b72562a8479c11c Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 11 Oct 2016 12:04:09 +0300 Subject: [PATCH 0810/1442] ceph: fix scheduler warning due to nested blocking commit 5c341ee32881c554727ec14b71ec3e8832f01989 upstream. try_get_cap_refs can be used as a condition in a wait_event* calls. This is all fine until it has to call __ceph_do_pending_vmtruncate, which in turn acquires the i_truncate_mutex. This leads to a situation in which a task's state is !TASK_RUNNING and at the same time it's trying to acquire a sleeping primitive. In essence a nested sleeping primitives are being used. This causes the following warning: WARNING: CPU: 22 PID: 11064 at kernel/sched/core.c:7631 __might_sleep+0x9f/0xb0() do not call blocking ops when !TASK_RUNNING; state=1 set at [] prepare_to_wait_event+0x5d/0x110 ipmi_msghandler tcp_scalable ib_qib dca ib_mad ib_core ib_addr ipv6 CPU: 22 PID: 11064 Comm: fs_checker.pl Tainted: G O 4.4.20-clouder2 #6 Hardware name: Supermicro X10DRi/X10DRi, BIOS 1.1a 10/16/2015 0000000000000000 ffff8838b416fa88 ffffffff812f4409 ffff8838b416fad0 ffffffff81a034f2 ffff8838b416fac0 ffffffff81052b46 ffffffff81a0432c 0000000000000061 0000000000000000 0000000000000000 ffff88167bda54a0 Call Trace: [] dump_stack+0x67/0x9e [] warn_slowpath_common+0x86/0xc0 [] warn_slowpath_fmt+0x4c/0x50 [] ? prepare_to_wait_event+0x5d/0x110 [] ? prepare_to_wait_event+0x5d/0x110 [] __might_sleep+0x9f/0xb0 [] mutex_lock+0x20/0x40 [] __ceph_do_pending_vmtruncate+0x44/0x1a0 [ceph] [] try_get_cap_refs+0xa2/0x320 [ceph] [] ceph_get_caps+0x255/0x2b0 [ceph] [] ? wait_woken+0xb0/0xb0 [] ceph_write_iter+0x2b1/0xde0 [ceph] [] ? schedule_timeout+0x202/0x260 [] ? kmem_cache_free+0x1ea/0x200 [] ? iput+0x9e/0x230 [] ? __might_sleep+0x52/0xb0 [] ? __might_fault+0x37/0x40 [] ? cp_new_stat+0x153/0x170 [] __vfs_write+0xaa/0xe0 [] vfs_write+0xa9/0x190 [] ? set_close_on_exec+0x31/0x70 [] SyS_write+0x46/0xa0 This happens since wait_event_interruptible can interfere with the mutex locking code, since they both fiddle with the task state. Fix the issue by using the newly-added nested blocking infrastructure in 61ada528dea0 ("sched/wait: Provide infrastructure to deal with nested blocking") Link: https://lwn.net/Articles/628628/ Signed-off-by: Nikolay Borisov Signed-off-by: Yan, Zheng Signed-off-by: Greg Kroah-Hartman --- fs/ceph/caps.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 16e6ded0b7f2..4037b389a7e9 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2507,9 +2507,15 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, if (err < 0) ret = err; } else { - ret = wait_event_interruptible(ci->i_cap_wq, - try_get_cap_refs(ci, need, want, endoff, - true, &_got, &err)); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + add_wait_queue(&ci->i_cap_wq, &wait); + + while (!try_get_cap_refs(ci, need, want, endoff, + true, &_got, &err)) + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); + + remove_wait_queue(&ci->i_cap_wq, &wait); + if (err == -EAGAIN) continue; if (err < 0) -- GitLab From 8934e069674a09c4ce4a0858e1aee5474784df92 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 22 Dec 2016 16:05:43 +0800 Subject: [PATCH 0811/1442] ceph: fix ceph_get_caps() interruption commit 6e09d0fb64402cec579f029ca4c7f39f5c48fc60 upstream. Commit 5c341ee32881 ("ceph: fix scheduler warning due to nested blocking") causes infinite loop when process is interrupted. Fix it. Signed-off-by: Yan, Zheng Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/caps.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 4037b389a7e9..f3f21105b860 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2511,8 +2511,13 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, add_wait_queue(&ci->i_cap_wq, &wait); while (!try_get_cap_refs(ci, need, want, endoff, - true, &_got, &err)) + true, &_got, &err)) { + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); + } remove_wait_queue(&ci->i_cap_wq, &wait); -- GitLab From 2e4f2131b66ffe64ef90d1e85223ef347e355b65 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 12 Jan 2017 14:42:38 -0500 Subject: [PATCH 0812/1442] ceph: fix endianness of getattr mask in ceph_d_revalidate commit 1097680d759918ce4a8705381c0ab2ed7bd60cf1 upstream. sparse says: fs/ceph/dir.c:1248:50: warning: incorrect type in assignment (different base types) fs/ceph/dir.c:1248:50: expected restricted __le32 [usertype] mask fs/ceph/dir.c:1248:50: got int [signed] [assigned] mask Fixes: 200fd27c8fa2 ("ceph: use lookup request to revalidate dentry") Signed-off-by: Jeff Layton Reviewed-by: Sage Weil Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/dir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index a594c7879cc2..1afa11191000 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1255,7 +1255,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) struct ceph_mds_client *mdsc = ceph_sb_to_client(dir->i_sb)->mdsc; struct ceph_mds_request *req; - int op, mask, err; + int op, err; + u32 mask; if (flags & LOOKUP_RCU) return -ECHILD; @@ -1270,7 +1271,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; if (ceph_security_xattr_wanted(dir)) mask |= CEPH_CAP_XATTR_SHARED; - req->r_args.getattr.mask = mask; + req->r_args.getattr.mask = cpu_to_le32(mask); err = ceph_mdsc_do_request(mdsc, NULL, req); switch (err) { -- GitLab From 6e9fa67c58ccd59df9cc21a8e9a4c4f9aea15fa8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 12 Jan 2017 14:42:39 -0500 Subject: [PATCH 0813/1442] ceph: fix endianness bug in frag_tree_split_cmp commit fe2ed42517533068ac03eed5630fffafff27eacf upstream. sparse says: fs/ceph/inode.c:308:36: warning: incorrect type in argument 1 (different base types) fs/ceph/inode.c:308:36: expected unsigned int [unsigned] [usertype] a fs/ceph/inode.c:308:36: got restricted __le32 [usertype] frag fs/ceph/inode.c:308:46: warning: incorrect type in argument 2 (different base types) fs/ceph/inode.c:308:46: expected unsigned int [unsigned] [usertype] b fs/ceph/inode.c:308:46: got restricted __le32 [usertype] frag We need to convert these values to host-endian before calling the comparator. Fixes: a407846ef7c6 ("ceph: don't assume frag tree splits in mds reply are sorted") Signed-off-by: Jeff Layton Reviewed-by: Sage Weil Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ef4d04647325..12f2252f6c98 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -305,7 +305,8 @@ static int frag_tree_split_cmp(const void *l, const void *r) { struct ceph_frag_tree_split *ls = (struct ceph_frag_tree_split*)l; struct ceph_frag_tree_split *rs = (struct ceph_frag_tree_split*)r; - return ceph_frag_compare(ls->frag, rs->frag); + return ceph_frag_compare(le32_to_cpu(ls->frag), + le32_to_cpu(rs->frag)); } static bool is_frag_child(u32 f, struct ceph_inode_frag *frag) -- GitLab From a193c72475960dfb98a37d0fc7739a10ccd95111 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Jan 2017 19:16:46 +0100 Subject: [PATCH 0814/1442] libceph: make sure ceph_aes_crypt() IV is aligned commit 124f930b8cbc4ac11236e6eb1c5f008318864588 upstream. ... otherwise the crypto stack will align it for us with a GFP_ATOMIC allocation and a memcpy() -- see skcipher_walk_first(). Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- net/ceph/crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 38936e1fd644..c48510385900 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -164,7 +164,7 @@ static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, SKCIPHER_REQUEST_ON_STACK(req, tfm); struct sg_table sgt; struct scatterlist prealloc_sg; - char iv[AES_BLOCK_SIZE]; + char iv[AES_BLOCK_SIZE] __aligned(8); int pad_byte = AES_BLOCK_SIZE - (in_len & (AES_BLOCK_SIZE - 1)); int crypt_len = encrypt ? in_len + pad_byte : in_len; int ret; -- GitLab From 8ade1c2b453019997e9cd367790366a4366491e8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Nov 2016 10:52:16 -0500 Subject: [PATCH 0815/1442] xprtrdma: Make FRWR send queue entry accounting more accurate commit 8d38de65644d900199f035277aa5f3da4aa9fc17 upstream. Verbs providers may perform house-keeping on the Send Queue during each signaled send completion. It is necessary therefore for a verbs consumer (like xprtrdma) to occasionally force a signaled send completion if it runs unsignaled most of the time. xprtrdma does not require signaled completions for Send or FastReg Work Requests, but does signal some LocalInv Work Requests. To ensure that Send Queue house-keeping can run before the Send Queue is more than half-consumed, xprtrdma forces a signaled completion on occasion by counting the number of Send Queue Entries it consumes. It currently does this by counting each ib_post_send as one Entry. Commit c9918ff56dfb ("xprtrdma: Add ro_unmap_sync method for FRWR") introduced the ability for frwr_op_unmap_sync to post more than one Work Request with a single post_send. Thus the underlying assumption of one Send Queue Entry per ib_post_send is no longer true. Also, FastReg Work Requests are currently never signaled. They should be signaled once in a while, just as Send is, to keep the accounting of consumed SQEs accurate. While we're here, convert the CQCOUNT macros to the currently preferred kernel coding style, which is inline functions. Fixes: c9918ff56dfb ("xprtrdma: Add ro_unmap_sync method for FRWR") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/frwr_ops.c | 13 ++++++++++--- net/sunrpc/xprtrdma/verbs.c | 10 ++-------- net/sunrpc/xprtrdma/xprt_rdma.h | 20 ++++++++++++++++++-- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 26b26beef2d4..adbf52c6df83 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -421,7 +421,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; - DECR_CQCOUNT(&r_xprt->rx_ep); + rpcrdma_set_signaled(&r_xprt->rx_ep, ®_wr->wr); rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); if (rc) goto out_senderr; @@ -486,7 +486,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mw *mw, *tmp; struct rpcrdma_frmr *f; - int rc; + int count, rc; dprintk("RPC: %s: req %p\n", __func__, req); @@ -496,6 +496,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * a single ib_post_send() call. */ f = NULL; + count = 0; invalidate_wrs = pos = prev = NULL; list_for_each_entry(mw, &req->rl_registered, mw_list) { if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) && @@ -505,6 +506,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) } pos = __frwr_prepare_linv_wr(mw); + count++; if (!invalidate_wrs) invalidate_wrs = pos; @@ -523,7 +525,12 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) f->fr_invwr.send_flags = IB_SEND_SIGNALED; f->fr_cqe.done = frwr_wc_localinv_wake; reinit_completion(&f->fr_linv_done); - INIT_CQCOUNT(&r_xprt->rx_ep); + + /* Initialize CQ count, since there is always a signaled + * WR being posted here. The new cqcount depends on how + * many SQEs are about to be consumed. + */ + rpcrdma_init_cqcount(&r_xprt->rx_ep, count); /* Transport disconnect drains the receive CQ before it * replaces the QP. The RPC reply handler won't call us diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index ec74289af7ec..451f5f27d8af 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -532,7 +532,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; /* always signal? */ - INIT_CQCOUNT(ep); + rpcrdma_init_cqcount(ep, 0); init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); @@ -1311,13 +1311,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, dprintk("RPC: %s: posting %d s/g entries\n", __func__, send_wr->num_sge); - if (DECR_CQCOUNT(ep) > 0) - send_wr->send_flags = 0; - else { /* Provider must take a send completion every now and then */ - INIT_CQCOUNT(ep); - send_wr->send_flags = IB_SEND_SIGNALED; - } - + rpcrdma_set_signaled(ep, send_wr); rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); if (rc) goto out_postsend_err; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 6e1bba358203..f6ae1b22da47 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -95,8 +95,24 @@ struct rpcrdma_ep { struct delayed_work rep_connect_worker; }; -#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) -#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) +static inline void +rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count) +{ + atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count); +} + +/* To update send queue accounting, provider must take a + * send completion every now and then. + */ +static inline void +rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr) +{ + send_wr->send_flags = 0; + if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) { + rpcrdma_init_cqcount(ep, 0); + send_wr->send_flags = IB_SEND_SIGNALED; + } +} /* Pre-allocate extra Work Requests for handling backward receives * and sends. This is a fixed value because the Work Queues are -- GitLab From d34b6684e60f25ae1e5c189b00f42ed65c5cdbc0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Nov 2016 10:53:13 -0500 Subject: [PATCH 0816/1442] xprtrdma: Squelch "max send, max recv" messages at connect time commit 6d6bf72de914059b304f7b99530a7856e5c846aa upstream. Clean up: This message was intended to be a dprintk, as it is on the server-side. Fixes: 87cfb9a0c85c ('xprtrdma: Client-side support for ...') Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 451f5f27d8af..8da7f6a4dfc3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -223,8 +223,8 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, cdata->inline_rsize = rsize; if (wsize < cdata->inline_wsize) cdata->inline_wsize = wsize; - pr_info("rpcrdma: max send %u, max recv %u\n", - cdata->inline_wsize, cdata->inline_rsize); + dprintk("RPC: %s: max send %u, max recv %u\n", + __func__, cdata->inline_wsize, cdata->inline_rsize); rpcrdma_set_max_header_sizes(r_xprt); } -- GitLab From 962957889d74c161d797dbf418d8d7ded241aaa1 Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Wed, 18 Jan 2017 09:09:25 +0200 Subject: [PATCH 0817/1442] arm64: mm: avoid name clash in __page_to_voff() commit 1c8a946bf3754a59cba1fc373949a8114bfe5aaa upstream. The arm64 __page_to_voff() macro takes a parameter called 'page', and also refers to 'struct page'. Thus, if the value passed in is not called 'page', we'll refer to the wrong struct name (which might not exist). Fixes: 3fa72fe9c614 ("arm64: mm: fix __page_to_voff definition") Acked-by: Mark Rutland Suggested-by: Volodymyr Babchuk Signed-off-by: Oleksandr Andrushchenko Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index b71086d25195..53211a0acf0f 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -217,7 +217,7 @@ static inline void *phys_to_virt(phys_addr_t x) #define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #else #define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) -#define __page_to_voff(page) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) +#define __page_to_voff(kaddr) (((u64)(kaddr) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) #define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET)) #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) -- GitLab From 776c2b2d165dc64397f3f9b39839920a08578e8f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jan 2017 12:46:33 +0100 Subject: [PATCH 0818/1442] arm64: Fix swiotlb fallback allocation commit 524dabe1c68e0bca25ce7b108099e5d89472a101 upstream. Commit b67a8b29df introduced logic to skip swiotlb allocation when all memory is DMA accessible anyway. While this is a great idea, __dma_alloc still calls swiotlb code unconditionally to allocate memory when there is no CMA memory available. The swiotlb code is called to ensure that we at least try get_free_pages(). Without initialization, swiotlb allocation code tries to access io_tlb_list which is NULL. That results in a stack trace like this: Unable to handle kernel NULL pointer dereference at virtual address 00000000 [...] [] swiotlb_tbl_map_single+0xd0/0x2b0 [] swiotlb_alloc_coherent+0x10c/0x198 [] __dma_alloc+0x68/0x1a8 [] drm_gem_cma_create+0x98/0x108 [drm] [] drm_fbdev_cma_create_with_funcs+0xbc/0x368 [drm_kms_helper] [] drm_fbdev_cma_create+0x2c/0x40 [drm_kms_helper] [] drm_fb_helper_initial_config+0x238/0x410 [drm_kms_helper] [] drm_fbdev_cma_init_with_funcs+0x98/0x160 [drm_kms_helper] [] drm_fbdev_cma_init+0x40/0x58 [drm_kms_helper] [] vc4_kms_load+0x90/0xf0 [vc4] [] vc4_drm_bind+0xec/0x168 [vc4] [...] Thankfully swiotlb code just learned how to not do allocations with the FORCE_NO option. This patch configures the swiotlb code to use that if we decide not to initialize the swiotlb framework. Fixes: b67a8b29df ("arm64: mm: only initialize swiotlb when necessary") Signed-off-by: Alexander Graf CC: Jisheng Zhang CC: Geert Uytterhoeven CC: Konrad Rzeszutek Wilk Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 212c4d1e2f26..59bd3d47647b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -403,6 +403,8 @@ void __init mem_init(void) { if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb_init(1); + else + swiotlb_force = SWIOTLB_NO_FORCE; set_max_mapnr(pfn_to_page(max_pfn) - mem_map); -- GitLab From 1fd1e6cd63143cf5d198a536d875dfc88ce179bc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 Dec 2016 14:28:41 +0100 Subject: [PATCH 0819/1442] swiotlb: Convert swiotlb_force from int to enum commit ae7871be189cb41184f1e05742b4a99e2c59774d upstream. Convert the flag swiotlb_force from an int to an enum, to prepare for the advent of more possible values. Suggested-by: Konrad Rzeszutek Wilk Signed-off-by: Geert Uytterhoeven Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/dma-mapping.c | 3 ++- arch/arm64/mm/init.c | 3 ++- arch/x86/kernel/pci-swiotlb.c | 2 +- arch/x86/xen/pci-swiotlb-xen.c | 2 +- drivers/xen/swiotlb-xen.c | 4 ++-- include/linux/swiotlb.h | 7 ++++++- include/trace/events/swiotlb.h | 16 +++++++++------- lib/swiotlb.c | 8 ++++---- 8 files changed, 27 insertions(+), 18 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3f74d0d98de6..02265a589ef5 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -524,7 +524,8 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { - if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + if (swiotlb_force == SWIOTLB_FORCE || + max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb = 1; return atomic_pool_init(); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 59bd3d47647b..380ebe705093 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -401,7 +401,8 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { - if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + if (swiotlb_force == SWIOTLB_FORCE || + max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index b47edb8f5256..8da13d4e77cc 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -70,7 +70,7 @@ int __init pci_swiotlb_detect_override(void) { int use_swiotlb = swiotlb | swiotlb_force; - if (swiotlb_force) + if (swiotlb_force == SWIOTLB_FORCE) swiotlb = 1; return use_swiotlb; diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 0e98e5d241d0..5f8b4b0302b6 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -49,7 +49,7 @@ int __init pci_xen_swiotlb_detect(void) * activate this IOMMU. If running as PV privileged, activate it * irregardless. */ - if ((xen_initial_domain() || swiotlb || swiotlb_force)) + if (xen_initial_domain() || swiotlb || swiotlb_force == SWIOTLB_FORCE) xen_swiotlb = 1; /* If we are running under Xen, we MUST disable the native SWIOTLB. diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 87e6035c9e81..8e7a3d646531 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -392,7 +392,7 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, if (dma_capable(dev, dev_addr, size) && !range_straddles_page_boundary(phys, size) && !xen_arch_need_swiotlb(dev, phys, dev_addr) && - !swiotlb_force) { + (swiotlb_force != SWIOTLB_FORCE)) { /* we are not interested in the dma_addr returned by * xen_dma_map_page, only in the potential cache flushes executed * by the function. */ @@ -549,7 +549,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, phys_addr_t paddr = sg_phys(sg); dma_addr_t dev_addr = xen_phys_to_bus(paddr); - if (swiotlb_force || + if (swiotlb_force == SWIOTLB_FORCE || xen_arch_need_swiotlb(hwdev, paddr, dev_addr) || !dma_capable(hwdev, dev_addr, sg->length) || range_straddles_page_boundary(paddr, sg->length)) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 5f81f8a187f2..746ecebbd4ca 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -9,7 +9,12 @@ struct device; struct page; struct scatterlist; -extern int swiotlb_force; +enum swiotlb_force { + SWIOTLB_NORMAL, /* Default - depending on HW DMA mask etc. */ + SWIOTLB_FORCE, /* swiotlb=force */ +}; + +extern enum swiotlb_force swiotlb_force; /* * Maximum allowable number of contiguous slabs to map, diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h index 7ea4c5e7c448..5e2e30a7efce 100644 --- a/include/trace/events/swiotlb.h +++ b/include/trace/events/swiotlb.h @@ -11,16 +11,16 @@ TRACE_EVENT(swiotlb_bounced, TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size, - int swiotlb_force), + enum swiotlb_force swiotlb_force), TP_ARGS(dev, dev_addr, size, swiotlb_force), TP_STRUCT__entry( - __string( dev_name, dev_name(dev) ) - __field( u64, dma_mask ) - __field( dma_addr_t, dev_addr ) - __field( size_t, size ) - __field( int, swiotlb_force ) + __string( dev_name, dev_name(dev) ) + __field( u64, dma_mask ) + __field( dma_addr_t, dev_addr ) + __field( size_t, size ) + __field( enum swiotlb_force, swiotlb_force ) ), TP_fast_assign( @@ -37,7 +37,9 @@ TRACE_EVENT(swiotlb_bounced, __entry->dma_mask, (unsigned long long)__entry->dev_addr, __entry->size, - __entry->swiotlb_force ? "swiotlb_force" : "" ) + __print_symbolic(__entry->swiotlb_force, + { SWIOTLB_NORMAL, "NORMAL" }, + { SWIOTLB_FORCE, "FORCE" })) ); #endif /* _TRACE_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 22e13a0e19d7..68e8f49c7e06 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -53,7 +53,7 @@ */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -int swiotlb_force; +enum swiotlb_force swiotlb_force; /* * Used to do a quick range check in swiotlb_tbl_unmap_single and @@ -107,7 +107,7 @@ setup_io_tlb_npages(char *str) if (*str == ',') ++str; if (!strcmp(str, "force")) - swiotlb_force = 1; + swiotlb_force = SWIOTLB_FORCE; return 0; } @@ -749,7 +749,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * we can safely return the device addr and not worry about bounce * buffering it. */ - if (dma_capable(dev, dev_addr, size) && !swiotlb_force) + if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) return dev_addr; trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); @@ -888,7 +888,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, phys_addr_t paddr = sg_phys(sg); dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - if (swiotlb_force || + if (swiotlb_force == SWIOTLB_FORCE || !dma_capable(hwdev, dev_addr, sg->length)) { phys_addr_t map = map_single(hwdev, sg_phys(sg), sg->length, dir); -- GitLab From 41c6b3e8989e79772a50429d92cf91959bcce96d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 Dec 2016 14:28:42 +0100 Subject: [PATCH 0820/1442] swiotlb: Add swiotlb=noforce debug option commit fff5d99225107f5f13fe4a9805adc2a1c4b5fb00 upstream. On architectures like arm64, swiotlb is tied intimately to the core architecture DMA support. In addition, ZONE_DMA cannot be disabled. To aid debugging and catch devices not supporting DMA to memory outside the 32-bit address space, add a kernel command line option "swiotlb=noforce", which disables the use of bounce buffers. If specified, trying to map memory that cannot be used with DMA will fail, and a rate-limited warning will be printed. Note that io_tlb_nslabs is set to 1, which is the minimal supported value. Signed-off-by: Geert Uytterhoeven Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 3 ++- include/linux/swiotlb.h | 1 + include/trace/events/swiotlb.h | 3 ++- lib/swiotlb.c | 18 ++++++++++++++++-- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 37babf91f2cb..922dec8fa07e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3998,10 +3998,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. it if 0 is given (See Documentation/cgroup-v1/memory.txt) swiotlb= [ARM,IA-64,PPC,MIPS,X86] - Format: { | force } + Format: { | force | noforce } -- Number of I/O TLB slabs force -- force using of bounce buffers even if they wouldn't be automatically used by the kernel + noforce -- Never use bounce buffers (for debugging) switches= [HW,M68k] diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 746ecebbd4ca..d2613536fd03 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -12,6 +12,7 @@ struct scatterlist; enum swiotlb_force { SWIOTLB_NORMAL, /* Default - depending on HW DMA mask etc. */ SWIOTLB_FORCE, /* swiotlb=force */ + SWIOTLB_NO_FORCE, /* swiotlb=noforce */ }; extern enum swiotlb_force swiotlb_force; diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h index 5e2e30a7efce..288c0c54a2b4 100644 --- a/include/trace/events/swiotlb.h +++ b/include/trace/events/swiotlb.h @@ -39,7 +39,8 @@ TRACE_EVENT(swiotlb_bounced, __entry->size, __print_symbolic(__entry->swiotlb_force, { SWIOTLB_NORMAL, "NORMAL" }, - { SWIOTLB_FORCE, "FORCE" })) + { SWIOTLB_FORCE, "FORCE" }, + { SWIOTLB_NO_FORCE, "NO_FORCE" })) ); #endif /* _TRACE_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 68e8f49c7e06..ad1d2962d129 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -106,8 +106,12 @@ setup_io_tlb_npages(char *str) } if (*str == ',') ++str; - if (!strcmp(str, "force")) + if (!strcmp(str, "force")) { swiotlb_force = SWIOTLB_FORCE; + } else if (!strcmp(str, "noforce")) { + swiotlb_force = SWIOTLB_NO_FORCE; + io_tlb_nslabs = 1; + } return 0; } @@ -541,8 +545,15 @@ static phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, enum dma_data_direction dir) { - dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); + dma_addr_t start_dma_addr; + + if (swiotlb_force == SWIOTLB_NO_FORCE) { + dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", + &phys); + return SWIOTLB_MAP_ERROR; + } + start_dma_addr = phys_to_dma(hwdev, io_tlb_start); return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); } @@ -707,6 +718,9 @@ static void swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, int do_panic) { + if (swiotlb_force == SWIOTLB_NO_FORCE) + return; + /* * Ran out of IOMMU space for this operation. This is very bad. * Unfortunately the drivers cannot handle this operation properly. -- GitLab From a07a122ad2a2e03209155fe907bf8b1e6ae4c915 Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Mon, 9 Jan 2017 16:33:36 -0500 Subject: [PATCH 0821/1442] scsi: ses: Fix SAS device detection in enclosure commit 9373eba6cfae48911b977d14323032cd5d161aae upstream. The call to scsi_is_sas_rphy() needs to be made on the SAS end_device, not on the SCSI device. Fixes: 835831c57e9b ("ses: use scsi_is_sas_rphy instead of is_sas_attached") Signed-off-by: Ewan D. Milne Reviewed-by: Johannes Thumshirn Reviewed-by: James Bottomley Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ses.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 8c9a35c91705..50adabbb5808 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -587,7 +587,7 @@ static void ses_match_to_enclosure(struct enclosure_device *edev, ses_enclosure_data_process(edev, to_scsi_device(edev->edev.parent), 0); - if (scsi_is_sas_rphy(&sdev->sdev_gendev)) + if (scsi_is_sas_rphy(sdev->sdev_target->dev.parent)) efd.addr = sas_get_address(sdev); if (efd.addr) { -- GitLab From 16236802bfecb1082144a48b7d6fa60997824662 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 1 Jan 2017 09:39:24 -0800 Subject: [PATCH 0822/1442] scsi: mpt3sas: fix hang on ata passthrough commands commit ffb58456589443ca572221fabbdef3db8483a779 upstream. mpt3sas has a firmware failure where it can only handle one pass through ATA command at a time. If another comes in, contrary to the SAT standard, it will hang until the first one completes (causing long commands like secure erase to timeout). The original fix was to block the device when an ATA command came in, but this caused a regression with commit 669f044170d8933c3d66d231b69ea97cb8447338 Author: Bart Van Assche Date: Tue Nov 22 16:17:13 2016 -0800 scsi: srp_transport: Move queuecommand() wait code to SCSI core So fix the original fix of the secure erase timeout by properly returning SAM_STAT_BUSY like the SAT recommends. The original patch also had a concurrency problem since scsih_qcmd is lockless at that point (this is fixed by using atomic bitops to set and test the flag). [mkp: addressed feedback wrt. test_bit and fixed whitespace] Fixes: 18f6084a989ba1b (mpt3sas: Fix secure erase premature termination) Signed-off-by: James Bottomley Acked-by: Sreekanth Reddy Reviewed-by: Christoph Hellwig Reported-by: Ingo Molnar Tested-by: Ingo Molnar Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_base.h | 12 +++++++++ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 40 ++++++++++++++++++---------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 3e71bc1b4a80..7008061c4b5b 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -393,6 +393,7 @@ struct MPT3SAS_TARGET { * @eedp_enable: eedp support enable bit * @eedp_type: 0(type_1), 1(type_2), 2(type_3) * @eedp_block_length: block size + * @ata_command_pending: SATL passthrough outstanding for device */ struct MPT3SAS_DEVICE { struct MPT3SAS_TARGET *sas_target; @@ -402,6 +403,17 @@ struct MPT3SAS_DEVICE { u8 block; u8 tlr_snoop_check; u8 ignore_delay_remove; + /* + * Bug workaround for SATL handling: the mpt2/3sas firmware + * doesn't return BUSY or TASK_SET_FULL for subsequent + * commands while a SATL pass through is in operation as the + * spec requires, it simply does nothing with them until the + * pass through completes, causing them possibly to timeout if + * the passthrough is a long executing command (like format or + * secure erase). This variable allows us to do the right + * thing while a SATL command is pending. + */ + unsigned long ata_command_pending; }; #define MPT3_CMD_NOT_USED 0x8000 /* free */ diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 1c4744e78173..f84a6087cebd 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3885,9 +3885,18 @@ _scsih_temp_threshold_events(struct MPT3SAS_ADAPTER *ioc, } } -static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd) +static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) { - return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16); + struct MPT3SAS_DEVICE *priv = scmd->device->hostdata; + + if (scmd->cmnd[0] != ATA_12 && scmd->cmnd[0] != ATA_16) + return 0; + + if (pending) + return test_and_set_bit(0, &priv->ata_command_pending); + + clear_bit(0, &priv->ata_command_pending); + return 0; } /** @@ -3911,9 +3920,7 @@ _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc) if (!scmd) continue; count++; - if (ata_12_16_cmd(scmd)) - scsi_internal_device_unblock(scmd->device, - SDEV_RUNNING); + _scsih_set_satl_pending(scmd, false); mpt3sas_base_free_smid(ioc, smid); scsi_dma_unmap(scmd); if (ioc->pci_error_recovery) @@ -4044,13 +4051,6 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) if (ioc->logging_level & MPT_DEBUG_SCSI) scsi_print_command(scmd); - /* - * Lock the device for any subsequent command until command is - * done. - */ - if (ata_12_16_cmd(scmd)) - scsi_internal_device_block(scmd->device); - sas_device_priv_data = scmd->device->hostdata; if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { scmd->result = DID_NO_CONNECT << 16; @@ -4064,6 +4064,19 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) return 0; } + /* + * Bug work around for firmware SATL handling. The loop + * is based on atomic operations and ensures consistency + * since we're lockless at this point + */ + do { + if (test_bit(0, &sas_device_priv_data->ata_command_pending)) { + scmd->result = SAM_STAT_BUSY; + scmd->scsi_done(scmd); + return 0; + } + } while (_scsih_set_satl_pending(scmd, true)); + sas_target_priv_data = sas_device_priv_data->sas_target; /* invalid device handle */ @@ -4626,8 +4639,7 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) if (scmd == NULL) return 1; - if (ata_12_16_cmd(scmd)) - scsi_internal_device_unblock(scmd->device, SDEV_RUNNING); + _scsih_set_satl_pending(scmd, false); mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); -- GitLab From 7959835473173638527f3d294fcccb1b1697bd92 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 28 Dec 2016 20:52:36 +0900 Subject: [PATCH 0823/1442] PM / devfreq: exynos-bus: Fix the wrong return value commit 32dd7731699765f21dbe6df9020e613d4ed73fc3 upstream. This patch fixes the wrong return value. If devfreq driver requires the wrong and non-available governor, it is fail. So, this patch returns the error insead of -EPROBE_DEFER. Fixes: 403e0689d2a9 (PM / devfreq: exynos: Add support of bus frequency of sub-blocks using passive governor) Signed-off-by: Chanwoo Choi Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/exynos-bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index 29866f7e6d7e..1b21bb60e797 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -498,7 +498,7 @@ static int exynos_bus_probe(struct platform_device *pdev) if (IS_ERR(bus->devfreq)) { dev_err(dev, "failed to add devfreq dev with passive governor\n"); - ret = -EPROBE_DEFER; + ret = PTR_ERR(bus->devfreq); goto err; } -- GitLab From ebdfcaa14eef97ae0711823524fd8c8865c05933 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 28 Dec 2016 20:52:35 +0900 Subject: [PATCH 0824/1442] PM / devfreq: Fix the bug of devfreq_add_device when governor is NULL commit 73613b16cb5c5d5a659fc8832eff99eead3f9afb upstream. This patch fixes the bug of devfreq_add_device(). The devfreq device must have the default governor. If find_devfreq_governor() returns error, devfreq_add_device() fail to add the devfreq instance. Fixes: 1b5c1be2c88e (PM / devfreq: map devfreq drivers to governor using name) Signed-off-by: Chanwoo Choi Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/devfreq/devfreq.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index bf3ea7603a58..712592cef1a2 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -593,11 +593,16 @@ struct devfreq *devfreq_add_device(struct device *dev, list_add(&devfreq->node, &devfreq_list); governor = find_devfreq_governor(devfreq->governor_name); - if (!IS_ERR(governor)) - devfreq->governor = governor; - if (devfreq->governor) - err = devfreq->governor->event_handler(devfreq, - DEVFREQ_GOV_START, NULL); + if (IS_ERR(governor)) { + dev_err(dev, "%s: Unable to find governor for the device\n", + __func__); + err = PTR_ERR(governor); + goto err_init; + } + + devfreq->governor = governor; + err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START, + NULL); if (err) { dev_err(dev, "%s: Unable to start governor for the device\n", __func__); -- GitLab From e55e6c026b7c49c4dcc33f4ebc330e660ecf3963 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Oct 2016 11:06:47 +0300 Subject: [PATCH 0825/1442] mtd: spi-nor: Off by one in cqspi_setup_flash() commit 193e87143c290ec16838f5368adc0e0bc94eb931 upstream. There are CQSPI_MAX_CHIPSELECT elements in the ->f_pdata array so the > should be >=. Fixes: 140623410536 ('mtd: spi-nor: Add driver for Cadence Quad SPI Flash Controller') Signed-off-by: Dan Carpenter Reviewed-by: Marek Vasut Signed-off-by: Cyrille Pitchen Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/cadence-quadspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index d403ba7b8f43..944863be0320 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -1082,7 +1082,7 @@ static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np) goto err; } - if (cs > CQSPI_MAX_CHIPSELECT) { + if (cs >= CQSPI_MAX_CHIPSELECT) { dev_err(dev, "Chip select %d out of range.\n", cs); goto err; } -- GitLab From 0f3418442d7bc8050426c733022fd111d8ae1375 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Oct 2016 11:30:39 +0300 Subject: [PATCH 0826/1442] mtd: spi-nor: Fix some error codes in cqspi_setup_flash() commit 10ad1d754a434c024ce7eeb1cec9f69fd3ecc863 upstream. We return success or possibly uninitialized values on these error paths instead of proper error codes. Fixes: 140623410536 ("mtd: spi-nor: Add driver for Cadence Quad SPI Flash Controller") Signed-off-by: Dan Carpenter Reviewed-by: Marek Vasut Reviewed-by: Moritz Fischer Signed-off-by: Cyrille Pitchen Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/cadence-quadspi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index 944863be0320..d489fbd07c12 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -1077,12 +1077,14 @@ static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np) /* Get flash device data */ for_each_available_child_of_node(dev->of_node, np) { - if (of_property_read_u32(np, "reg", &cs)) { + ret = of_property_read_u32(np, "reg", &cs); + if (ret) { dev_err(dev, "Couldn't determine chip select.\n"); goto err; } if (cs >= CQSPI_MAX_CHIPSELECT) { + ret = -EINVAL; dev_err(dev, "Chip select %d out of range.\n", cs); goto err; } -- GitLab From fcdab6ca9c31bc4d2abfa931d9ebde986515dedd Mon Sep 17 00:00:00 2001 From: Loic Pallardy Date: Thu, 15 Dec 2016 15:49:56 +0100 Subject: [PATCH 0827/1442] rpmsg: virtio_rpmsg_bus: fix channel creation commit 63447646ac657fde00bb658ce21a3431940ae0ad upstream. Since commit 4dffed5b3ac796b ("rpmsg: Name rpmsg devices based on channel id"), it is no more possible for a firmware to register twice a service (on different endpoints). rpmsg_register_device function is failing when calling device_add for the second time as second device has the same name as first one already register. It is because name is based only on service name and so is not more unique. Previously name was unique thanks to the use of rpmsg_dev_index. This patch adds destination and source endpoint numbers device name to create an unique identifier. Fixes: 4dffed5b3ac7 ("rpmsg: Name rpmsg devices based on channel id") Acked-by: Peter Griffin Signed-off-by: Loic Pallardy [bjorn: flipped name and address in device name] Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/rpmsg/rpmsg_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index b6ea9ffa7381..e0a629eaceab 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -411,8 +411,8 @@ int rpmsg_register_device(struct rpmsg_device *rpdev) struct device *dev = &rpdev->dev; int ret; - dev_set_name(&rpdev->dev, "%s:%s", - dev_name(dev->parent), rpdev->id.name); + dev_set_name(&rpdev->dev, "%s.%s.%d.%d", dev_name(dev->parent), + rpdev->id.name, rpdev->src, rpdev->dst); rpdev->dev.bus = &rpmsg_bus; rpdev->dev.release = rpmsg_release_device; -- GitLab From 2a306053176853c785d48dfe27da8d1c544723b5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 14 Jul 2016 07:18:14 -0300 Subject: [PATCH 0828/1442] blackfin: check devm_pinctrl_get() for errors commit c9205e18b41a6ef5ad73e1c4b86a78b2ea3ccb9b upstream. devm_pinctrl_get() can fail so we should check for that. Fixes: 0a6824bc10de ('[media] v4l2: blackfin: select proper pinctrl state in ppi_set_params if CONFIG_PINCTRL is enabled') Signed-off-by: Dan Carpenter Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/blackfin/ppi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/platform/blackfin/ppi.c b/drivers/media/platform/blackfin/ppi.c index cff63e511e6d..b8f3d9fa66e9 100644 --- a/drivers/media/platform/blackfin/ppi.c +++ b/drivers/media/platform/blackfin/ppi.c @@ -214,6 +214,8 @@ static int ppi_set_params(struct ppi_if *ppi, struct ppi_params *params) if (params->dlen > 24 || params->dlen <= 0) return -EINVAL; pctrl = devm_pinctrl_get(ppi->dev); + if (IS_ERR(pctrl)) + return PTR_ERR(pctrl); pstate = pinctrl_lookup_state(pctrl, pin_state[(params->dlen + 7) / 8 - 1]); if (pinctrl_select_state(pctrl, pstate)) -- GitLab From cec5ef6ac5202c79d0a95e65d41511a770e8607b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 19 Sep 2016 09:46:30 -0300 Subject: [PATCH 0829/1442] platform: pxa_camera: add VIDEO_V4L2 dependency commit ff681022c6639c194fbb6893c50ace9e52a44788 upstream. Moving the pxa_camera driver from soc_camera lots the implied VIDEO_V4L2 Kconfig dependency, and building the driver without V4L2 results in a kernel that cannot link: drivers/media/platform/pxa_camera.o: In function `pxa_camera_remove': pxa_camera.c:(.text.pxa_camera_remove+0x10): undefined reference to `v4l2_clk_unregister' pxa_camera.c:(.text.pxa_camera_remove+0x18): undefined reference to `v4l2_device_unregister' drivers/media/platform/pxa_camera.o: In function `pxa_camera_probe': pxa_camera.c:(.text.pxa_camera_probe+0x458): undefined reference to `v4l2_of_parse_endpoint' drivers/media/v4l2-core/videobuf2-core.o: In function `__enqueue_in_driver': drivers/media/v4l2-core/videobuf2-core.o: In function `vb2_core_streamon': videobuf2-core.c:(.text.vb2_core_streamon+0x1b4): undefined reference to `v4l_vb2q_enable_media_source' drivers/media/v4l2-core/videobuf2-v4l2.o: In function `vb2_ioctl_reqbufs': videobuf2-v4l2.c:(.text.vb2_ioctl_reqbufs+0xc): undefined reference to `video_devdata' This adds back an explicit dependency. Fixes: 3050b9985024 ("[media] media: platform: pxa_camera: move pxa_camera out of soc_camera") Signed-off-by: Arnd Bergmann Acked-by: Robert Jarzmik Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index ce4a96fccc43..5ff803efdc03 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -93,7 +93,7 @@ config VIDEO_OMAP3_DEBUG config VIDEO_PXA27x tristate "PXA27x Quick Capture Interface driver" - depends on VIDEO_DEV && HAS_DMA + depends on VIDEO_DEV && VIDEO_V4L2 && HAS_DMA depends on PXA27x || COMPILE_TEST select VIDEOBUF2_DMA_SG select SG_SPLIT -- GitLab From 278997a8e002ba1cedf132e86b12acf00137c28a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2016 10:09:39 -0300 Subject: [PATCH 0830/1442] gs1662: drop kfree for memory allocated with devm_kzalloc commit df94121f02ecce435d6b5277071eb94b764caa89 upstream. It's not necessary to free memory allocated with devm_kzalloc and using kfree leads to a double free. Fixes: 7aae6e2df127 ("[media] Add GS1662 driver, a video serializer") Signed-off-by: Wei Yongjun Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/spi/gs1662.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/spi/gs1662.c b/drivers/media/spi/gs1662.c index d76f36233f43..5143a90219c0 100644 --- a/drivers/media/spi/gs1662.c +++ b/drivers/media/spi/gs1662.c @@ -453,10 +453,9 @@ static int gs_probe(struct spi_device *spi) static int gs_remove(struct spi_device *spi) { struct v4l2_subdev *sd = spi_get_drvdata(spi); - struct gs *gs = to_gs(sd); v4l2_device_unregister_subdev(sd); - kfree(gs); + return 0; } -- GitLab From b9dc16170dbf5385a273019175c0576d3224cd36 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sat, 10 Sep 2016 13:59:49 -0300 Subject: [PATCH 0831/1442] ite-cir: initialize use_demodulator before using it commit 7ec03e60ef81c19b5d3a46dd070ee966774b860f upstream. Function ite_set_carrier_params() uses variable use_demodulator after having initialized it to false in some if branches, but this variable is never set to true otherwise. This bug has been found using clang -Wsometimes-uninitialized warning flag. Fixes: 620a32bba4a2 ("[media] rc: New rc-based ite-cir driver for several ITE CIRs") Signed-off-by: Nicolas Iooss Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/ite-cir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c index 0f301903aa6f..63165d324fff 100644 --- a/drivers/media/rc/ite-cir.c +++ b/drivers/media/rc/ite-cir.c @@ -263,6 +263,8 @@ static void ite_set_carrier_params(struct ite_dev *dev) if (allowance > ITE_RXDCR_MAX) allowance = ITE_RXDCR_MAX; + + use_demodulator = true; } } -- GitLab From dfe8e5730fa1a91129c65dacf85783fa796d7e0a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2016 04:32:24 -0300 Subject: [PATCH 0832/1442] st-hva: fix some error handling in hva_hw_probe() commit 6b2bed891253c08e7f4c17dbd46b71fc87f22eef upstream. The devm_ioremap_resource() returns error pointers, never NULL. The platform_get_resource() returns NULL on error, never error pointers. The error code needs to be set, as well. The current code returns PTR_ERR(NULL) which is success. Fixes: 57b2c0628b60 ("[media] st-hva: multi-format video encoder V4L2 driver") Signed-off-by: Dan Carpenter Acked-by: Jean-Christophe Trotin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/sti/hva/hva-hw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/sti/hva/hva-hw.c b/drivers/media/platform/sti/hva/hva-hw.c index d341d4994528..cf2a8d884536 100644 --- a/drivers/media/platform/sti/hva/hva-hw.c +++ b/drivers/media/platform/sti/hva/hva-hw.c @@ -305,16 +305,16 @@ int hva_hw_probe(struct platform_device *pdev, struct hva_dev *hva) /* get memory for registers */ regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); hva->regs = devm_ioremap_resource(dev, regs); - if (IS_ERR_OR_NULL(hva->regs)) { + if (IS_ERR(hva->regs)) { dev_err(dev, "%s failed to get regs\n", HVA_PREFIX); return PTR_ERR(hva->regs); } /* get memory for esram */ esram = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (IS_ERR_OR_NULL(esram)) { + if (!esram) { dev_err(dev, "%s failed to get esram\n", HVA_PREFIX); - return PTR_ERR(esram); + return -ENODEV; } hva->esram_addr = esram->start; hva->esram_size = resource_size(esram); -- GitLab From d47e1e7c46fed13f6e9d60c98f8352c714db695e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 13 Oct 2016 11:39:04 -0300 Subject: [PATCH 0833/1442] s5p-cec: mark PM functions as __maybe_unused again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit eadf081146ec327d6fbbb6aff28e3d9aac329dc6 upstream. A bugfix removed the two callers of s5p_cec_runtime_suspend and s5p_cec_runtime_resume, leading to the return of a harmless warning that I had previously fixed in commit aee8937089b1 ("[media] s5p_cec: mark suspend/resume as __maybe_unused"): staging/media/s5p-cec/s5p_cec.c:234:12: error: ‘s5p_cec_runtime_suspend’ defined but not used [-Werror=unused-function] staging/media/s5p-cec/s5p_cec.c:242:12: error: ‘s5p_cec_runtime_resume’ defined but not used [-Werror=unused-function] This adds the __maybe_unused annotations to the function that were not removed and that are now unused when CONFIG_PM is disabled. Fixes: 57b978ada073 ("[media] s5p-cec: fix system and runtime PM integration") Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/s5p-cec/s5p_cec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/s5p-cec/s5p_cec.c b/drivers/staging/media/s5p-cec/s5p_cec.c index 1780a08b73c9..58d756231136 100644 --- a/drivers/staging/media/s5p-cec/s5p_cec.c +++ b/drivers/staging/media/s5p-cec/s5p_cec.c @@ -231,7 +231,7 @@ static int s5p_cec_remove(struct platform_device *pdev) return 0; } -static int s5p_cec_runtime_suspend(struct device *dev) +static int __maybe_unused s5p_cec_runtime_suspend(struct device *dev) { struct s5p_cec_dev *cec = dev_get_drvdata(dev); @@ -239,7 +239,7 @@ static int s5p_cec_runtime_suspend(struct device *dev) return 0; } -static int s5p_cec_runtime_resume(struct device *dev) +static int __maybe_unused s5p_cec_runtime_resume(struct device *dev) { struct s5p_cec_dev *cec = dev_get_drvdata(dev); int ret; -- GitLab From 3bef7578e05f8188743641da8bc7ddfdbb64d268 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 14 Nov 2016 12:09:26 -0200 Subject: [PATCH 0834/1442] s5p-mfc: Fix clock management in s5p_mfc_release() function commit c0026c7bfb95c250c3e34fde59f96ad72fd730d6 upstream. Clock control indirectly requires access to MFC device, so call it only if we are sure that the device exists in s5p_mfc_release function. s5p_mfc_remove() calls s5p_mfc_final_pm(), which releases all PM related resources, including clocks, so any call to clocks related functions is not valid after s5p_mfc_final_pm(). Fixes: d695c12 ("[media] media: s5p-mfc fix invalid memory access from s5p_mfc_release()") Signed-off-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/s5p-mfc/s5p_mfc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index 3436eda58855..27e7cf65c2a7 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -926,10 +926,11 @@ static int s5p_mfc_release(struct file *file) mfc_debug_enter(); if (dev) mutex_lock(&dev->mfc_mutex); - s5p_mfc_clock_on(); vb2_queue_release(&ctx->vq_src); vb2_queue_release(&ctx->vq_dst); if (dev) { + s5p_mfc_clock_on(); + /* Mark context as idle */ clear_work_bit_irqsave(ctx); /* @@ -951,9 +952,9 @@ static int s5p_mfc_release(struct file *file) if (s5p_mfc_power_off() < 0) mfc_err("Power off failed\n"); } + mfc_debug(2, "Shutting down clock\n"); + s5p_mfc_clock_off(); } - mfc_debug(2, "Shutting down clock\n"); - s5p_mfc_clock_off(); if (dev) dev->ctx[ctx->num] = NULL; s5p_mfc_dec_ctrls_delete(ctx); -- GitLab From 172270c743487ec24cc145949a77bd57136ffff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 11 Jan 2017 15:39:31 +0100 Subject: [PATCH 0835/1442] dmaengine: rcar-dmac: unmap slave resource when channel is freed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3139dc8ded6f27552a248d23fe9f086e3027fa12 upstream. The slave mapping should be removed together with other channel resources when the channel is freed. If it's not unmapped it will hang around forever after the channel is freed. Fixes: 9f878603dbdb7db3 ("dmaengine: rcar-dmac: add iommu support for slave transfers") Reported-by: Laurent Pinchart Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/sh/rcar-dmac.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 2e441d0ccd79..4c357d475465 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -986,6 +986,7 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan) { struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); struct rcar_dmac *dmac = to_rcar_dmac(chan->device); + struct rcar_dmac_chan_map *map = &rchan->map; struct rcar_dmac_desc_page *page, *_page; struct rcar_dmac_desc *desc; LIST_HEAD(list); @@ -1019,6 +1020,13 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan) free_page((unsigned long)page); } + /* Remove slave mapping if present. */ + if (map->slave.xfer_size) { + dma_unmap_resource(chan->device->dev, map->addr, + map->slave.xfer_size, map->dir, 0); + map->slave.xfer_size = 0; + } + pm_runtime_put(chan->device->dev); } -- GitLab From d21814a8068ae5a006453f6c2c0b687a87249b24 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 16 Dec 2016 11:39:11 +0100 Subject: [PATCH 0836/1442] dmaengine: pl330: Fix runtime PM support for terminated transfers commit 5c9e6c2b2ba3ec3a442e2fb5b4286498f8b4dcb7 upstream. PL330 DMA engine driver is leaking a runtime reference after any terminated DMA transactions. This patch fixes this issue by tracking runtime PM state of the device and making additional call to pm_runtime_put() in terminate_all callback if needed. Fixes: ae43b3289186 ("ARM: 8202/1: dmaengine: pl330: Add runtime Power Management support v12") Signed-off-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pl330.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 030fe05ed43b..9f3dbc8c63d2 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -448,6 +448,9 @@ struct dma_pl330_chan { /* for cyclic capability */ bool cyclic; + + /* for runtime pm tracking */ + bool active; }; struct pl330_dmac { @@ -2031,6 +2034,7 @@ static void pl330_tasklet(unsigned long data) _stop(pch->thread); spin_unlock(&pch->thread->dmac->lock); power_down = true; + pch->active = false; } else { /* Make sure the PL330 Channel thread is active */ spin_lock(&pch->thread->dmac->lock); @@ -2050,6 +2054,7 @@ static void pl330_tasklet(unsigned long data) desc->status = PREP; list_move_tail(&desc->node, &pch->work_list); if (power_down) { + pch->active = true; spin_lock(&pch->thread->dmac->lock); _start(pch->thread); spin_unlock(&pch->thread->dmac->lock); @@ -2164,6 +2169,7 @@ static int pl330_terminate_all(struct dma_chan *chan) unsigned long flags; struct pl330_dmac *pl330 = pch->dmac; LIST_HEAD(list); + bool power_down = false; pm_runtime_get_sync(pl330->ddma.dev); spin_lock_irqsave(&pch->lock, flags); @@ -2174,6 +2180,8 @@ static int pl330_terminate_all(struct dma_chan *chan) pch->thread->req[0].desc = NULL; pch->thread->req[1].desc = NULL; pch->thread->req_running = -1; + power_down = pch->active; + pch->active = false; /* Mark all desc done */ list_for_each_entry(desc, &pch->submitted_list, node) { @@ -2191,6 +2199,8 @@ static int pl330_terminate_all(struct dma_chan *chan) list_splice_tail_init(&pch->completed_list, &pl330->desc_pool); spin_unlock_irqrestore(&pch->lock, flags); pm_runtime_mark_last_busy(pl330->ddma.dev); + if (power_down) + pm_runtime_put_autosuspend(pl330->ddma.dev); pm_runtime_put_autosuspend(pl330->ddma.dev); return 0; @@ -2350,6 +2360,7 @@ static void pl330_issue_pending(struct dma_chan *chan) * updated on work_list emptiness status. */ WARN_ON(list_empty(&pch->submitted_list)); + pch->active = true; pm_runtime_get_sync(pch->dmac->ddma.dev); } list_splice_tail_init(&pch->submitted_list, &pch->work_list); -- GitLab From 97d5e2057564dc21aba1506ee30d869d691e3830 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Jan 2017 13:44:28 +0200 Subject: [PATCH 0837/1442] spi: pxa2xx: add missed break commit a2dd8af00ca7fff4972425a4a6b19dd1840dc807 upstream. The commit 7c7289a40425 ("spi: pxa2xx: Default thresholds to PXA configuration") while splitting up CE4100 code obviously missed a break condition in one chunk. Add it here. Looks like we have no active user of CE4100, though better to fix this later than never. Fixes: commit 7c7289a40425 ("spi: pxa2xx: Default thresholds to PXA configuration") Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-pxa2xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index dd7b5b47291d..d6239fa718be 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1690,6 +1690,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) pxa2xx_spi_write(drv_data, SSCR1, tmp); tmp = SSCR0_SCR(2) | SSCR0_Motorola | SSCR0_DataSize(8); pxa2xx_spi_write(drv_data, SSCR0, tmp); + break; default: tmp = SSCR1_RxTresh(RX_THRESH_DFLT) | SSCR1_TxTresh(TX_THRESH_DFLT); -- GitLab From f37b7a3004bbf49d76ed170e720293752e74a06f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 12 Jan 2017 14:53:41 +0000 Subject: [PATCH 0838/1442] soc: ti: wkup_m3_ipc: Fix error return code in wkup_m3_ipc_probe() commit 36b29eb30ee0f6c99f06bea406c23a3fd4cbb80b upstream. Fix to return a negative error code from the kthread_run() error handling case instead of 0, as done elsewhere in this function. Fixes: cdd5de500b2c ("soc: ti: Add wkup_m3_ipc driver") Signed-off-by: Wei Yongjun Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- drivers/soc/ti/wkup_m3_ipc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index 8823cc81ae45..5bb376009d98 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -459,6 +459,7 @@ static int wkup_m3_ipc_probe(struct platform_device *pdev) if (IS_ERR(task)) { dev_err(dev, "can't create rproc_boot thread\n"); + ret = PTR_ERR(task); goto err_put_rproc; } -- GitLab From fa555d021d2b87faffe156e642c781ce20b05932 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 19 Dec 2016 17:46:53 +0530 Subject: [PATCH 0839/1442] selftest/powerpc: Wrong PMC initialized in pmc56_overflow test commit df21d2fa733035e4d414379960f94b2516b41296 upstream. Test uses PMC2 to count the event. But PMC1 is being initialized. Patch to fix it. Fixes: 3752e453f6ba ('selftests/powerpc: Add tests of PMU EBBs') Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c index c22860ab9733..30e1ac62e8cb 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c @@ -66,7 +66,7 @@ int pmc56_overflow(void) FAIL_IF(ebb_event_enable(&event)); - mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + mtspr(SPRN_PMC2, pmc_sample_period(sample_period)); mtspr(SPRN_PMC5, 0); mtspr(SPRN_PMC6, 0); -- GitLab From 12274f2c17f2f7d5c0a0cfdc649410c9a37bb66c Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Mon, 29 Aug 2016 18:25:22 +0200 Subject: [PATCH 0840/1442] tools/virtio/ringtest: fix run-on-all.sh for offline cpus commit 21f5eda9b8671744539c8295b9df62991fffb2ce upstream. Since ef1b144d ("tools/virtio/ringtest: fix run-on-all.sh to work without /dev/cpu") run-on-all.sh uses seq 0 $HOST_AFFINITY as the list of ids of the CPUs to run the command on (assuming ids of online CPUs are consecutive and start from 0), where $HOST_AFFINITY is the highest CPU id in the system previously determined using lscpu. This can fail on systems with offline CPUs. Instead let's use lscpu to determine the list of online CPUs. Signed-off-by: Halil Pasic Fixes: ef1b144d ("tools/virtio/ringtest: fix run-on-all.sh to work without /dev/cpu") Reviewed-by: Sascha Silbe Signed-off-by: Cornelia Huck Signed-off-by: Greg Kroah-Hartman --- tools/virtio/ringtest/run-on-all.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh index 2e69ca812b4c..29b0d3920bfc 100755 --- a/tools/virtio/ringtest/run-on-all.sh +++ b/tools/virtio/ringtest/run-on-all.sh @@ -1,12 +1,13 @@ #!/bin/sh +CPUS_ONLINE=$(lscpu --online -p=cpu|grep -v -e '#') #use last CPU for host. Why not the first? #many devices tend to use cpu0 by default so #it tends to be busier -HOST_AFFINITY=$(lscpu -p=cpu | tail -1) +HOST_AFFINITY=$(echo "${CPUS_ONLINE}"|tail -n 1) #run command on all cpus -for cpu in $(seq 0 $HOST_AFFINITY) +for cpu in $CPUS_ONLINE do #Don't run guest and host on same CPU #It actually works ok if using signalling -- GitLab From 5b482bf5886855be44e89653f88297aab423dee6 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:08 +0100 Subject: [PATCH 0841/1442] libceph: uninline ceph_crypto_key_destroy() commit 6db2304aabb070261ad34923bfd83c43dfb000e3 upstream. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/crypto.c | 8 ++++++++ net/ceph/crypto.h | 9 +-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index c48510385900..f19f31eeeef9 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -80,6 +80,14 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey) return 0; } +void ceph_crypto_key_destroy(struct ceph_crypto_key *key) +{ + if (key) { + kfree(key->key); + key->key = NULL; + } +} + static struct crypto_skcipher *ceph_crypto_alloc_cipher(void) { return crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index c33bcafd3829..c4211590b721 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -14,19 +14,12 @@ struct ceph_crypto_key { void *key; }; -static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) -{ - if (key) { - kfree(key->key); - key->key = NULL; - } -} - int ceph_crypto_key_clone(struct ceph_crypto_key *dst, const struct ceph_crypto_key *src); int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end); int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end); int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in); +void ceph_crypto_key_destroy(struct ceph_crypto_key *key); /* crypto.c */ int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, -- GitLab From f77ef5348d4bfb498062930bd9af828f3ef2947e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:08 +0100 Subject: [PATCH 0842/1442] libceph: stop allocating a new cipher on every crypto request commit 7af3ea189a9a13f090de51c97f676215dabc1205 upstream. This is useless and more importantly not allowed on the writeback path, because crypto_alloc_skcipher() allocates memory with GFP_KERNEL, which can recurse back into the filesystem: kworker/9:3 D ffff92303f318180 0 20732 2 0x00000080 Workqueue: ceph-msgr ceph_con_workfn [libceph] ffff923035dd4480 ffff923038f8a0c0 0000000000000001 000000009eb27318 ffff92269eb28000 ffff92269eb27338 ffff923036b145ac ffff923035dd4480 00000000ffffffff ffff923036b145b0 ffffffff951eb4e1 ffff923036b145a8 Call Trace: [] ? schedule+0x31/0x80 [] ? schedule_preempt_disabled+0xa/0x10 [] ? __mutex_lock_slowpath+0xb4/0x130 [] ? mutex_lock+0x1b/0x30 [] ? xfs_reclaim_inodes_ag+0x233/0x2d0 [xfs] [] ? move_active_pages_to_lru+0x125/0x270 [] ? radix_tree_gang_lookup_tag+0xc5/0x1c0 [] ? __list_lru_walk_one.isra.3+0x33/0x120 [] ? xfs_reclaim_inodes_nr+0x31/0x40 [xfs] [] ? super_cache_scan+0x17e/0x190 [] ? shrink_slab.part.38+0x1e3/0x3d0 [] ? shrink_node+0x10a/0x320 [] ? do_try_to_free_pages+0xf4/0x350 [] ? try_to_free_pages+0xea/0x1b0 [] ? __alloc_pages_nodemask+0x61d/0xe60 [] ? cache_grow_begin+0x9d/0x560 [] ? fallback_alloc+0x148/0x1c0 [] ? __crypto_alloc_tfm+0x37/0x130 [] ? __kmalloc+0x1eb/0x580 [] ? crush_choose_firstn+0x3eb/0x470 [libceph] [] ? __crypto_alloc_tfm+0x37/0x130 [] ? crypto_spawn_tfm+0x39/0x60 [] ? crypto_cbc_init_tfm+0x23/0x40 [cbc] [] ? __crypto_alloc_tfm+0xcc/0x130 [] ? crypto_skcipher_init_tfm+0x113/0x180 [] ? crypto_create_tfm+0x43/0xb0 [] ? crypto_larval_lookup+0x150/0x150 [] ? crypto_alloc_tfm+0x72/0x120 [] ? ceph_aes_encrypt2+0x67/0x400 [libceph] [] ? ceph_pg_to_up_acting_osds+0x84/0x5b0 [libceph] [] ? release_sock+0x40/0x90 [] ? tcp_recvmsg+0x4b4/0xae0 [] ? ceph_encrypt2+0x54/0xc0 [libceph] [] ? ceph_x_encrypt+0x5d/0x90 [libceph] [] ? calcu_signature+0x5f/0x90 [libceph] [] ? ceph_x_sign_message+0x35/0x50 [libceph] [] ? prepare_write_message_footer+0x5c/0xa0 [libceph] [] ? ceph_con_workfn+0x2258/0x2dd0 [libceph] [] ? queue_con_delay+0x33/0xd0 [libceph] [] ? __submit_request+0x20d/0x2f0 [libceph] [] ? ceph_osdc_start_request+0x28/0x30 [libceph] [] ? rbd_queue_workfn+0x2f3/0x350 [rbd] [] ? process_one_work+0x160/0x410 [] ? worker_thread+0x4d/0x480 [] ? process_one_work+0x410/0x410 [] ? kthread+0xcd/0xf0 [] ? ret_from_fork+0x1f/0x40 [] ? kthread_create_on_node+0x190/0x190 Allocating the cipher along with the key fixes the issue - as long the key doesn't change, a single cipher context can be used concurrently in multiple requests. We still can't take that GFP_KERNEL allocation though. Both ceph_crypto_key_clone() and ceph_crypto_key_decode() are called from GFP_NOFS context, so resort to memalloc_noio_{save,restore}() here. Reported-by: Lucas Stach Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/crypto.c | 85 +++++++++++++++++++++++++++++++++-------------- net/ceph/crypto.h | 1 + 2 files changed, 61 insertions(+), 25 deletions(-) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index f19f31eeeef9..292e33bd916e 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -13,14 +13,60 @@ #include #include "crypto.h" +/* + * Set ->key and ->tfm. The rest of the key should be filled in before + * this function is called. + */ +static int set_secret(struct ceph_crypto_key *key, void *buf) +{ + unsigned int noio_flag; + int ret; + + key->key = NULL; + key->tfm = NULL; + + switch (key->type) { + case CEPH_CRYPTO_NONE: + return 0; /* nothing to do */ + case CEPH_CRYPTO_AES: + break; + default: + return -ENOTSUPP; + } + + WARN_ON(!key->len); + key->key = kmemdup(buf, key->len, GFP_NOIO); + if (!key->key) { + ret = -ENOMEM; + goto fail; + } + + /* crypto_alloc_skcipher() allocates with GFP_KERNEL */ + noio_flag = memalloc_noio_save(); + key->tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); + memalloc_noio_restore(noio_flag); + if (IS_ERR(key->tfm)) { + ret = PTR_ERR(key->tfm); + key->tfm = NULL; + goto fail; + } + + ret = crypto_skcipher_setkey(key->tfm, key->key, key->len); + if (ret) + goto fail; + + return 0; + +fail: + ceph_crypto_key_destroy(key); + return ret; +} + int ceph_crypto_key_clone(struct ceph_crypto_key *dst, const struct ceph_crypto_key *src) { memcpy(dst, src, sizeof(struct ceph_crypto_key)); - dst->key = kmemdup(src->key, src->len, GFP_NOFS); - if (!dst->key) - return -ENOMEM; - return 0; + return set_secret(dst, src->key); } int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) @@ -37,16 +83,16 @@ int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end) { + int ret; + ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad); key->type = ceph_decode_16(p); ceph_decode_copy(p, &key->created, sizeof(key->created)); key->len = ceph_decode_16(p); ceph_decode_need(p, end, key->len, bad); - key->key = kmalloc(key->len, GFP_NOFS); - if (!key->key) - return -ENOMEM; - ceph_decode_copy(p, key->key, key->len); - return 0; + ret = set_secret(key, *p); + *p += key->len; + return ret; bad: dout("failed to decode crypto key\n"); @@ -85,14 +131,11 @@ void ceph_crypto_key_destroy(struct ceph_crypto_key *key) if (key) { kfree(key->key); key->key = NULL; + crypto_free_skcipher(key->tfm); + key->tfm = NULL; } } -static struct crypto_skcipher *ceph_crypto_alloc_cipher(void) -{ - return crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); -} - static const u8 *aes_iv = (u8 *)CEPH_AES_IV; /* @@ -168,8 +211,7 @@ static void teardown_sgtable(struct sg_table *sgt) static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, void *buf, int buf_len, int in_len, int *pout_len) { - struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher(); - SKCIPHER_REQUEST_ON_STACK(req, tfm); + SKCIPHER_REQUEST_ON_STACK(req, key->tfm); struct sg_table sgt; struct scatterlist prealloc_sg; char iv[AES_BLOCK_SIZE] __aligned(8); @@ -177,20 +219,15 @@ static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, int crypt_len = encrypt ? in_len + pad_byte : in_len; int ret; - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - WARN_ON(crypt_len > buf_len); if (encrypt) memset(buf + in_len, pad_byte, pad_byte); ret = setup_sgtable(&sgt, &prealloc_sg, buf, crypt_len); if (ret) - goto out_tfm; + return ret; - crypto_skcipher_setkey((void *)tfm, key->key, key->len); memcpy(iv, aes_iv, AES_BLOCK_SIZE); - - skcipher_request_set_tfm(req, tfm); + skcipher_request_set_tfm(req, key->tfm); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv); @@ -232,8 +269,6 @@ static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, out_sgt: teardown_sgtable(&sgt); -out_tfm: - crypto_free_skcipher(tfm); return ret; } diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index c4211590b721..58d83aa7740f 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -12,6 +12,7 @@ struct ceph_crypto_key { struct ceph_timespec created; int len; void *key; + struct crypto_skcipher *tfm; }; int ceph_crypto_key_clone(struct ceph_crypto_key *dst, -- GitLab From 09f886dc5a6945679ed35e6acfbc746f472f0f34 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 26 Jan 2017 08:25:24 +0100 Subject: [PATCH 0843/1442] Linux 4.9.6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2a8af8af7b27..ef95231d1625 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 5 +SUBLEVEL = 6 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From eab46c4624613bedee0e8bfbe1db81c9bc05f19f Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 17 Nov 2014 21:11:23 -0800 Subject: [PATCH 0844/1442] ANDROID: usb: gadget: mtp/ptp: Migrate functions to the USB_FUNCTION interface This patch adds support to use mtp/ptp gadget functions through the DECLARE_USB_FUNCTION_INIT interface. enabling USB_CONFIGFS_F_MTP config compiles f_mtp.c thereby providing support for MTP gadget enabling USB_CONFIGFS_F_PTP config compiles f_ptp.c thereby providing support for PTP gadget Signed-off-by: Badhri Jagan Sridharan Change-Id: I38d7b570e8886d155ef10cd2c839b2232dcb3158 --- drivers/usb/gadget/Kconfig | 20 ++++ drivers/usb/gadget/Makefile | 5 + drivers/usb/gadget/f_mtp.c | 178 +++++++++++++++++++++++++++++++++++- drivers/usb/gadget/f_mtp.h | 18 ++++ drivers/usb/gadget/f_ptp.c | 38 ++++++++ 5 files changed, 255 insertions(+), 4 deletions(-) create mode 100644 drivers/usb/gadget/f_mtp.h create mode 100644 drivers/usb/gadget/f_ptp.c diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 8ad203296079..1d6281a8eeea 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -209,6 +209,12 @@ config USB_F_PRINTER config USB_F_TCM tristate +config USB_F_MTP + tristate + +config USB_F_PTP + tristate + # this first set of drivers all depend on bulk-capable hardware. config USB_CONFIGFS @@ -362,6 +368,20 @@ config USB_CONFIGFS_F_FS implemented in kernel space (for instance Ethernet, serial or mass storage) and other are implemented in user space. +config USB_CONFIGFS_F_MTP + boolean "MTP gadget" + depends on USB_CONFIGFS + select USB_F_MTP + help + USB gadget MTP support + +config USB_CONFIGFS_F_PTP + boolean "PTP gadget" + depends on USB_CONFIGFS && USB_CONFIGFS_F_MTP + select USB_F_PTP + help + USB gadget PTP support + config USB_CONFIGFS_F_UAC1 bool "Audio Class 1.0" depends on USB_CONFIGFS diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile index 598a67d6ba05..502c379cce80 100644 --- a/drivers/usb/gadget/Makefile +++ b/drivers/usb/gadget/Makefile @@ -10,3 +10,8 @@ libcomposite-y := usbstring.o config.o epautoconf.o libcomposite-y += composite.o functions.o configfs.o u_f.o obj-$(CONFIG_USB_GADGET) += udc/ function/ legacy/ + +usb_f_mtp-y := f_mtp.o +obj-$(CONFIG_USB_F_MTP) += usb_f_mtp.o +usb_f_ptp-y := f_ptp.o +obj-$(CONFIG_USB_F_PTP) += usb_f_ptp.o diff --git a/drivers/usb/gadget/f_mtp.c b/drivers/usb/gadget/f_mtp.c index 620aeaaf2d72..82f6b2ebaebb 100644 --- a/drivers/usb/gadget/f_mtp.c +++ b/drivers/usb/gadget/f_mtp.c @@ -35,9 +35,14 @@ #include #include #include +#include +#include + +#include "configfs.h" #define MTP_BULK_BUFFER_SIZE 16384 #define INTR_BUFFER_SIZE 28 +#define MAX_INST_NAME_LEN 40 /* String IDs */ #define INTERFACE_STRING_INDEX 0 @@ -66,8 +71,9 @@ /* constants for device status */ #define MTP_RESPONSE_OK 0x2001 #define MTP_RESPONSE_DEVICE_BUSY 0x2019 +#define DRIVER_NAME "mtp" -static const char mtp_shortname[] = "mtp_usb"; +static const char mtp_shortname[] = DRIVER_NAME "_usb"; struct mtp_dev { struct usb_function function; @@ -280,6 +286,12 @@ struct mtp_data_header { __le32 transaction_id; }; +struct mtp_instance { + struct usb_function_instance func_inst; + const char *name; + struct mtp_dev *dev; +}; + /* temporary variable used between mtp_open() and mtp_gadget_bind() */ static struct mtp_dev *_mtp_dev; @@ -456,7 +468,7 @@ static int mtp_create_bulk_endpoints(struct mtp_dev *dev, return 0; fail: - printk(KERN_ERR "mtp_bind() could not allocate requests\n"); + pr_err("mtp_bind() could not allocate requests\n"); return -1; } @@ -1099,6 +1111,13 @@ mtp_function_bind(struct usb_configuration *c, struct usb_function *f) return id; mtp_interface_desc.bInterfaceNumber = id; + if (mtp_string_defs[INTERFACE_STRING_INDEX].id == 0) { + ret = usb_string_id(c->cdev); + if (ret < 0) + return ret; + mtp_string_defs[INTERFACE_STRING_INDEX].id = ret; + mtp_interface_desc.iInterface = ret; + } /* allocate endpoints */ ret = mtp_create_bulk_endpoints(dev, &mtp_fullspeed_in_desc, &mtp_fullspeed_out_desc, &mtp_intr_desc); @@ -1126,6 +1145,7 @@ mtp_function_unbind(struct usb_configuration *c, struct usb_function *f) struct usb_request *req; int i; + mtp_string_defs[INTERFACE_STRING_INDEX].id = 0; while ((req = mtp_req_get(dev, &dev->tx_idle))) mtp_request_free(req, dev->ep_in); for (i = 0; i < RX_REQ_MAX; i++) @@ -1213,7 +1233,7 @@ static int mtp_bind_config(struct usb_configuration *c, bool ptp_config) } dev->cdev = c->cdev; - dev->function.name = "mtp"; + dev->function.name = DRIVER_NAME; dev->function.strings = mtp_strings; if (ptp_config) { dev->function.fs_descriptors = fs_ptp_descs; @@ -1230,12 +1250,16 @@ static int mtp_bind_config(struct usb_configuration *c, bool ptp_config) return usb_add_function(c, &dev->function); } -static int mtp_setup(void) +static int __mtp_setup(struct mtp_instance *fi_mtp) { struct mtp_dev *dev; int ret; dev = kzalloc(sizeof(*dev), GFP_KERNEL); + + if (fi_mtp != NULL) + fi_mtp->dev = dev; + if (!dev) return -ENOMEM; @@ -1273,6 +1297,17 @@ static int mtp_setup(void) return ret; } +static int mtp_setup(void) +{ + return __mtp_setup(NULL); +} + +static int mtp_setup_configfs(struct mtp_instance *fi_mtp) +{ + return __mtp_setup(fi_mtp); +} + + static void mtp_cleanup(void) { struct mtp_dev *dev = _mtp_dev; @@ -1285,3 +1320,138 @@ static void mtp_cleanup(void) _mtp_dev = NULL; kfree(dev); } + +static struct mtp_instance *to_mtp_instance(struct config_item *item) +{ + return container_of(to_config_group(item), struct mtp_instance, + func_inst.group); +} + +static void mtp_attr_release(struct config_item *item) +{ + struct mtp_instance *fi_mtp = to_mtp_instance(item); + usb_put_function_instance(&fi_mtp->func_inst); +} + +static struct configfs_item_operations mtp_item_ops = { + .release = mtp_attr_release, +}; + +static struct config_item_type mtp_func_type = { + .ct_item_ops = &mtp_item_ops, + .ct_owner = THIS_MODULE, +}; + + +static struct mtp_instance *to_fi_mtp(struct usb_function_instance *fi) +{ + return container_of(fi, struct mtp_instance, func_inst); +} + +static int mtp_set_inst_name(struct usb_function_instance *fi, const char *name) +{ + struct mtp_instance *fi_mtp; + char *ptr; + int name_len; + + name_len = strlen(name) + 1; + if (name_len > MAX_INST_NAME_LEN) + return -ENAMETOOLONG; + + ptr = kstrndup(name, name_len, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + fi_mtp = to_fi_mtp(fi); + fi_mtp->name = ptr; + + return 0; +} + +static void mtp_free_inst(struct usb_function_instance *fi) +{ + struct mtp_instance *fi_mtp; + + fi_mtp = to_fi_mtp(fi); + kfree(fi_mtp->name); + mtp_cleanup(); + kfree(fi_mtp); +} + +struct usb_function_instance *alloc_inst_mtp_ptp(bool mtp_config) +{ + struct mtp_instance *fi_mtp; + int ret = 0; + + fi_mtp = kzalloc(sizeof(*fi_mtp), GFP_KERNEL); + if (!fi_mtp) + return ERR_PTR(-ENOMEM); + fi_mtp->func_inst.set_inst_name = mtp_set_inst_name; + fi_mtp->func_inst.free_func_inst = mtp_free_inst; + + if (mtp_config) { + ret = mtp_setup_configfs(fi_mtp); + if (ret) { + kfree(fi_mtp); + pr_err("Error setting MTP\n"); + return ERR_PTR(ret); + } + } else + fi_mtp->dev = _mtp_dev; + + config_group_init_type_name(&fi_mtp->func_inst.group, + "", &mtp_func_type); + + return &fi_mtp->func_inst; +} +EXPORT_SYMBOL_GPL(alloc_inst_mtp_ptp); + +static struct usb_function_instance *mtp_alloc_inst(void) +{ + return alloc_inst_mtp_ptp(true); +} + +static int mtp_ctrlreq_configfs(struct usb_function *f, + const struct usb_ctrlrequest *ctrl) +{ + return mtp_ctrlrequest(f->config->cdev, ctrl); +} + +static void mtp_free(struct usb_function *f) +{ + /*NO-OP: no function specific resource allocation in mtp_alloc*/ +} + +struct usb_function *function_alloc_mtp_ptp(struct usb_function_instance *fi, + bool mtp_config) +{ + struct mtp_instance *fi_mtp = to_fi_mtp(fi); + struct mtp_dev *dev = fi_mtp->dev; + + dev->function.name = DRIVER_NAME; + dev->function.strings = mtp_strings; + if (mtp_config) { + dev->function.fs_descriptors = fs_mtp_descs; + dev->function.hs_descriptors = hs_mtp_descs; + } else { + dev->function.fs_descriptors = fs_ptp_descs; + dev->function.hs_descriptors = hs_ptp_descs; + } + dev->function.bind = mtp_function_bind; + dev->function.unbind = mtp_function_unbind; + dev->function.set_alt = mtp_function_set_alt; + dev->function.disable = mtp_function_disable; + dev->function.setup = mtp_ctrlreq_configfs; + dev->function.free_func = mtp_free; + + return &dev->function; +} +EXPORT_SYMBOL_GPL(function_alloc_mtp_ptp); + +static struct usb_function *mtp_alloc(struct usb_function_instance *fi) +{ + return function_alloc_mtp_ptp(fi, true); +} + +DECLARE_USB_FUNCTION_INIT(mtp, mtp_alloc_inst, mtp_alloc); +MODULE_LICENSE("GPL"); diff --git a/drivers/usb/gadget/f_mtp.h b/drivers/usb/gadget/f_mtp.h new file mode 100644 index 000000000000..7adb1ff08eff --- /dev/null +++ b/drivers/usb/gadget/f_mtp.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2014 Google, Inc. + * Author: Badhri Jagan Sridharan + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +extern struct usb_function_instance *alloc_inst_mtp_ptp(bool mtp_config); +extern struct usb_function *function_alloc_mtp_ptp( + struct usb_function_instance *fi, bool mtp_config); diff --git a/drivers/usb/gadget/f_ptp.c b/drivers/usb/gadget/f_ptp.c new file mode 100644 index 000000000000..da3e4d53e085 --- /dev/null +++ b/drivers/usb/gadget/f_ptp.c @@ -0,0 +1,38 @@ +/* + * Gadget Function Driver for PTP + * + * Copyright (C) 2014 Google, Inc. + * Author: Badhri Jagan Sridharan + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include + +#include +#include + +#include "f_mtp.h" + +static struct usb_function_instance *ptp_alloc_inst(void) +{ + return alloc_inst_mtp_ptp(false); +} + +static struct usb_function *ptp_alloc(struct usb_function_instance *fi) +{ + return function_alloc_mtp_ptp(fi, false); +} + +DECLARE_USB_FUNCTION_INIT(ptp, ptp_alloc_inst, ptp_alloc); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Badhri Jagan Sridharan"); -- GitLab From 26cf9896e3238fc6099d0cf67962cae5a14bd1cf Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 15 Dec 2014 16:42:27 -0800 Subject: [PATCH 0845/1442] ANDROID: usb: gadget: configfs: Add usb_function ptr to fi struct Add a pointer to the usb_function inside the usb_function_instance structure to service functions specific setup requests even before the function gets added to the usb_gadget Signed-off-by: Badhri Jagan Sridharan Change-Id: I6f457006f6c5516cc6986ec2acdf5b1ecf259d0c --- include/linux/usb/composite.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 4616a49a1c2e..93f0253a7e01 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -581,6 +581,7 @@ struct usb_function_instance { struct config_group group; struct list_head cfs_list; struct usb_function_driver *fd; + struct usb_function *f; int (*set_inst_name)(struct usb_function_instance *inst, const char *name); void (*free_func_inst)(struct usb_function_instance *inst); -- GitLab From 2187738259417d4bdf6d72100a88f3e34ad22a53 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 15 Dec 2014 10:44:47 -0800 Subject: [PATCH 0846/1442] ANDROID: usb: gadget: Add Uevent to notify userspace Android userspace UsbDeviceManager relies on the uevents generated by the composition driver to generate user notifications. This CL adds uevents to be generated whenever USB changes its state i.e. connected, disconnected, configured. This CL also intercepts the setup requests from the usb_core anb routes it to the specific usb function if required. Signed-off-by: Badhri Jagan Sridharan Change-Id: Ib3d3a78255a532f7449dac286f776c2966caf8c1 --- drivers/usb/gadget/Kconfig | 8 ++ drivers/usb/gadget/configfs.c | 154 +++++++++++++++++++++++++++++++++- 2 files changed, 159 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 1d6281a8eeea..22ed99a836ed 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -382,6 +382,14 @@ config USB_CONFIGFS_F_PTP help USB gadget PTP support +config USB_CONFIGFS_UEVENT + boolean "Uevent notification of Gadget state" + depends on USB_CONFIGFS + help + Enable uevent notifications to userspace when the gadget + state changes. The gadget can be in any of the following + three states: "CONNECTED/DISCONNECTED/CONFIGURED" + config USB_CONFIGFS_F_UAC1 bool "Audio Class 1.0" depends on USB_CONFIGFS diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 3984787f8e97..532be07a8d45 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -9,6 +9,20 @@ #include "u_f.h" #include "u_os_desc.h" +#ifdef CONFIG_USB_CONFIGFS_UEVENT +#include +#include +#include +#include "u_fs.h" + +#ifdef CONFIG_USB_CONFIGFS_F_ACC +extern int acc_ctrlrequest(struct usb_composite_dev *cdev, + const struct usb_ctrlrequest *ctrl); +void acc_disconnect(void); +#endif +static struct class *android_class; +#endif + int check_user_usb_string(const char *name, struct usb_gadget_strings *stringtab_dev) { @@ -60,6 +74,12 @@ struct gadget_info { bool use_os_desc; char b_vendor_code; char qw_sign[OS_STRING_QW_SIGN_LEN]; +#ifdef CONFIG_USB_CONFIGFS_UEVENT + bool connected; + bool sw_connected; + struct work_struct work; + struct device *dev; +#endif }; static inline struct gadget_info *to_gadget_info(struct config_item *item) @@ -265,7 +285,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct config_item *item, mutex_lock(&gi->lock); - if (!strlen(name)) { + if (!strlen(name) || strcmp(name, "none") == 0) { ret = unregister_gadget(gi); if (ret) goto err; @@ -1367,6 +1387,57 @@ static int configfs_composite_bind(struct usb_gadget *gadget, return ret; } +#ifdef CONFIG_USB_CONFIGFS_UEVENT +static void android_work(struct work_struct *data) +{ + struct gadget_info *gi = container_of(data, struct gadget_info, work); + struct usb_composite_dev *cdev = &gi->cdev; + char *disconnected[2] = { "USB_STATE=DISCONNECTED", NULL }; + char *connected[2] = { "USB_STATE=CONNECTED", NULL }; + char *configured[2] = { "USB_STATE=CONFIGURED", NULL }; + /* 0-connected 1-configured 2-disconnected*/ + bool status[3] = { false, false, false }; + unsigned long flags; + bool uevent_sent = false; + + spin_lock_irqsave(&cdev->lock, flags); + if (cdev->config) + status[1] = true; + + if (gi->connected != gi->sw_connected) { + if (gi->connected) + status[0] = true; + else + status[2] = true; + gi->sw_connected = gi->connected; + } + spin_unlock_irqrestore(&cdev->lock, flags); + + if (status[0]) { + kobject_uevent_env(&gi->dev->kobj, KOBJ_CHANGE, connected); + pr_info("%s: sent uevent %s\n", __func__, connected[0]); + uevent_sent = true; + } + + if (status[1]) { + kobject_uevent_env(&gi->dev->kobj, KOBJ_CHANGE, configured); + pr_info("%s: sent uevent %s\n", __func__, configured[0]); + uevent_sent = true; + } + + if (status[2]) { + kobject_uevent_env(&gi->dev->kobj, KOBJ_CHANGE, disconnected); + pr_info("%s: sent uevent %s\n", __func__, disconnected[0]); + uevent_sent = true; + } + + if (!uevent_sent) { + pr_info("%s: did not send uevent (%d %d %p)\n", __func__, + gi->connected, gi->sw_connected, cdev->config); + } +} +#endif + static void configfs_composite_unbind(struct usb_gadget *gadget) { struct usb_composite_dev *cdev; @@ -1386,14 +1457,78 @@ static void configfs_composite_unbind(struct usb_gadget *gadget) set_gadget_data(gadget, NULL); } +#ifdef CONFIG_USB_CONFIGFS_UEVENT +static int android_setup(struct usb_gadget *gadget, + const struct usb_ctrlrequest *c) +{ + struct usb_composite_dev *cdev = get_gadget_data(gadget); + unsigned long flags; + struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev); + int value = -EOPNOTSUPP; + struct usb_function_instance *fi; + + spin_lock_irqsave(&cdev->lock, flags); + if (!gi->connected) { + gi->connected = 1; + schedule_work(&gi->work); + } + spin_unlock_irqrestore(&cdev->lock, flags); + list_for_each_entry(fi, &gi->available_func, cfs_list) { + if (fi != NULL && fi->f != NULL && fi->f->setup != NULL) { + value = fi->f->setup(fi->f, c); + if (value >= 0) + break; + } + } + +#ifdef CONFIG_USB_CONFIGFS_F_ACC + if (value < 0) + value = acc_ctrlrequest(cdev, c); +#endif + + if (value < 0) + value = composite_setup(gadget, c); + + spin_lock_irqsave(&cdev->lock, flags); + if (c->bRequest == USB_REQ_SET_CONFIGURATION && + cdev->config) { + schedule_work(&gi->work); + } + spin_unlock_irqrestore(&cdev->lock, flags); + + return value; +} + +static void android_disconnect(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev = get_gadget_data(gadget); + struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev); + + /* accessory HID support can be active while the + accessory function is not actually enabled, + so we need to inform it when we are disconnected. + */ + +#ifdef CONFIG_USB_CONFIGFS_F_ACC + acc_disconnect(); +#endif + gi->connected = 0; + schedule_work(&gi->work); + composite_disconnect(gadget); +} +#endif + static const struct usb_gadget_driver configfs_driver_template = { .bind = configfs_composite_bind, .unbind = configfs_composite_unbind, - +#ifdef CONFIG_USB_CONFIGFS_UEVENT + .setup = android_setup, + .disconnect = android_disconnect, +#else .setup = composite_setup, .reset = composite_disconnect, .disconnect = composite_disconnect, - +#endif .suspend = composite_suspend, .resume = composite_resume, @@ -1453,6 +1588,12 @@ static struct config_group *gadgets_make( gi->composite.gadget_driver.function = kstrdup(name, GFP_KERNEL); gi->composite.name = gi->composite.gadget_driver.function; +#ifdef CONFIG_USB_CONFIGFS_UEVENT + INIT_WORK(&gi->work, android_work); + gi->dev = device_create(android_class, NULL, + MKDEV(0, 0), NULL, "android0"); +#endif + if (!gi->composite.gadget_driver.function) goto err; @@ -1504,6 +1645,13 @@ static int __init gadget_cfs_init(void) config_group_init(&gadget_subsys.su_group); ret = configfs_register_subsystem(&gadget_subsys); + +#ifdef CONFIG_USB_CONFIGFS_UEVENT + android_class = class_create(THIS_MODULE, "android_usb"); + if (IS_ERR(android_class)) + return PTR_ERR(android_class); +#endif + return ret; } module_init(gadget_cfs_init); -- GitLab From 65758ce5a977b3e24830522c225475c4a1046103 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 16 Jan 2015 05:41:10 +0530 Subject: [PATCH 0847/1442] ANDROID: usb: gadget: check for accessory device before disconnecting HIDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While disabling ConfigFS Android gadget, android_disconnect() calls kill_all_hid_devices(), if CONFIG_USB_CONFIGFS_F_ACC is enabled, to free the registered HIDs without checking whether the USB accessory device really exist or not. If USB accessory device doesn't exist then we run into following kernel panic: ----8<---- [  136.724761] Unable to handle kernel NULL pointer dereference at virtual address 00000064 [  136.724809] pgd = c0204000 [  136.731924] [00000064] *pgd=00000000 [  136.737830] Internal error: Oops: 5 [#1] SMP ARM [  136.738108] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.18.0-rc4-00400-gf75300e-dirty #76 [  136.742788] task: c0fb19d8 ti: c0fa4000 task.ti: c0fa4000 [  136.750890] PC is at _raw_spin_lock_irqsave+0x24/0x60 [  136.756246] LR is at kill_all_hid_devices+0x24/0x114 ---->8---- This patch adds a test to check if USB Accessory device exists before freeing HIDs. Change-Id: Ie229feaf0de3f4f7a151fcaa9a994e34e15ff73b Signed-off-by: Amit Pundir --- drivers/usb/gadget/f_accessory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index f8490c034107..9ffe017bf1cf 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -929,6 +929,10 @@ kill_all_hid_devices(struct acc_dev *dev) struct list_head *entry, *temp; unsigned long flags; + /* do nothing if usb accessory device doesn't exist */ + if (!dev) + return; + spin_lock_irqsave(&dev->lock, flags); list_for_each_safe(entry, temp, &dev->hid_list) { hid = list_entry(entry, struct acc_hid_dev, list); -- GitLab From 6d9285e2574a00ef15a468314787a613d6976571 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 25 Mar 2015 14:37:23 -0700 Subject: [PATCH 0848/1442] ANDROID: usb: gadget: f_audio_source:replace deprecated API Replace snd_card_create with snd_card_new. snd_card_create depcrecated starting form v3.15 Signed-off-by: Badhri Jagan Sridharan Change-Id: I76f7d753812963d595055bce7d3e6518163482f5 --- drivers/usb/gadget/f_audio_source.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c index 21ced13c83d8..4b9786b48950 100644 --- a/drivers/usb/gadget/f_audio_source.c +++ b/drivers/usb/gadget/f_audio_source.c @@ -788,16 +788,16 @@ int audio_source_bind_config(struct usb_configuration *c, audio = &_audio_dev; - err = snd_card_create(SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1, + err = snd_card_new(&c->cdev->gadget->dev, + SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1, THIS_MODULE, 0, &card); if (err) return err; - snd_card_set_dev(card, &c->cdev->gadget->dev); - err = snd_pcm_new(card, "USB audio source", 0, 1, 0, &pcm); if (err) goto pcm_fail; + pcm->private_data = audio; pcm->info_flags = 0; audio->pcm = pcm; -- GitLab From d6b2d77cd67268f69cba11685b496ca2afce7e5f Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 27 Mar 2015 14:15:19 -0700 Subject: [PATCH 0849/1442] ANDROID: usb: gadget: Add function devices to the parent Added create_function_device to create child function devices for USB gadget functions. Android UsbDeviceManager relies on communicating to the devices created by the gadget functions to implement functions such as audio_source. Signed-off-by: Badhri Jagan Sridharan Change-Id: I0df9ad86ac32d8cdacdea164e9fed49891b45fc2 --- drivers/usb/gadget/configfs.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 532be07a8d45..3cff594aaeea 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -13,7 +13,6 @@ #include #include #include -#include "u_fs.h" #ifdef CONFIG_USB_CONFIGFS_F_ACC extern int acc_ctrlrequest(struct usb_composite_dev *cdev, @@ -21,6 +20,18 @@ extern int acc_ctrlrequest(struct usb_composite_dev *cdev, void acc_disconnect(void); #endif static struct class *android_class; +static struct device *android_device; +static int index; + +struct device *create_function_device(char *name) +{ + if (android_device && !IS_ERR(android_device)) + return device_create(android_class, android_device, + MKDEV(0, index++), NULL, name); + else + return ERR_PTR(-EINVAL); +} +EXPORT_SYMBOL_GPL(create_function_device); #endif int check_user_usb_string(const char *name, @@ -1414,19 +1425,22 @@ static void android_work(struct work_struct *data) spin_unlock_irqrestore(&cdev->lock, flags); if (status[0]) { - kobject_uevent_env(&gi->dev->kobj, KOBJ_CHANGE, connected); + kobject_uevent_env(&android_device->kobj, + KOBJ_CHANGE, connected); pr_info("%s: sent uevent %s\n", __func__, connected[0]); uevent_sent = true; } if (status[1]) { - kobject_uevent_env(&gi->dev->kobj, KOBJ_CHANGE, configured); + kobject_uevent_env(&android_device->kobj, + KOBJ_CHANGE, configured); pr_info("%s: sent uevent %s\n", __func__, configured[0]); uevent_sent = true; } if (status[2]) { - kobject_uevent_env(&gi->dev->kobj, KOBJ_CHANGE, disconnected); + kobject_uevent_env(&android_device->kobj, + KOBJ_CHANGE, disconnected); pr_info("%s: sent uevent %s\n", __func__, disconnected[0]); uevent_sent = true; } @@ -1590,8 +1604,10 @@ static struct config_group *gadgets_make( #ifdef CONFIG_USB_CONFIGFS_UEVENT INIT_WORK(&gi->work, android_work); - gi->dev = device_create(android_class, NULL, + android_device = device_create(android_class, NULL, MKDEV(0, 0), NULL, "android0"); + if (IS_ERR(android_device)) + goto err; #endif if (!gi->composite.gadget_driver.function) @@ -1606,6 +1622,9 @@ static struct config_group *gadgets_make( static void gadgets_drop(struct config_group *group, struct config_item *item) { config_item_put(item); +#ifdef CONFIG_USB_CONFIGFS_UEVENT + device_destroy(android_device->class, android_device->devt); +#endif } static struct configfs_group_operations gadgets_ops = { @@ -1659,5 +1678,10 @@ module_init(gadget_cfs_init); static void __exit gadget_cfs_exit(void) { configfs_unregister_subsystem(&gadget_subsys); +#ifdef CONFIG_USB_CONFIGFS_UEVENT + if (!IS_ERR(android_class)) + class_destroy(android_class); +#endif + } module_exit(gadget_cfs_exit); -- GitLab From 743a13cbcab9035e7eade5bd4c4e1fba5feef636 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Sun, 23 Nov 2014 13:51:28 -0800 Subject: [PATCH 0850/1442] ANDROID: usb:gadget:audio_source: Move to USB_FUNCTION API This patch adds support to use audio_source gadget function through DECLARE_USB_FUNCTION_INIT interface. Signed-off-by: Badhri Jagan Sridharan Change-Id: I1fc6c9ea07105ae4eb785eebd3bb925bfdd8bc6b --- drivers/usb/gadget/Kconfig | 10 ++ drivers/usb/gadget/Makefile | 2 + drivers/usb/gadget/f_audio_source.c | 234 +++++++++++++++++++++++++++- 3 files changed, 239 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 22ed99a836ed..eea7fe0ca8ee 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -213,6 +213,9 @@ config USB_F_MTP tristate config USB_F_PTP + tristate + +config USB_F_AUDIO_SRC tristate # this first set of drivers all depend on bulk-capable hardware. @@ -382,6 +385,13 @@ config USB_CONFIGFS_F_PTP help USB gadget PTP support +config USB_CONFIGFS_F_AUDIO_SRC + boolean "Audio Source gadget" + depends on USB_CONFIGFS + select USB_F_AUDIO_SRC + help + USB gadget Audio Source support + config USB_CONFIGFS_UEVENT boolean "Uevent notification of Gadget state" depends on USB_CONFIGFS diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile index 502c379cce80..386ed6343e33 100644 --- a/drivers/usb/gadget/Makefile +++ b/drivers/usb/gadget/Makefile @@ -15,3 +15,5 @@ usb_f_mtp-y := f_mtp.o obj-$(CONFIG_USB_F_MTP) += usb_f_mtp.o usb_f_ptp-y := f_ptp.o obj-$(CONFIG_USB_F_PTP) += usb_f_ptp.o +usb_f_audio_source-y := f_audio_source.o +obj-$(CONFIG_USB_F_AUDIO_SRC) += usb_f_audio_source.o diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c index 4b9786b48950..39645be93502 100644 --- a/drivers/usb/gadget/f_audio_source.c +++ b/drivers/usb/gadget/f_audio_source.c @@ -21,6 +21,13 @@ #include #include +#include +#include +#include +#include +#include +#include +#include #define SAMPLE_RATE 44100 #define FRAMES_PER_MSEC (SAMPLE_RATE / 1000) @@ -32,6 +39,7 @@ #define AUDIO_AC_INTERFACE 0 #define AUDIO_AS_INTERFACE 1 #define AUDIO_NUM_INTERFACES 2 +#define MAX_INST_NAME_LEN 40 /* B.3.1 Standard AC Interface Descriptor */ static struct usb_interface_descriptor ac_interface_desc = { @@ -259,6 +267,7 @@ struct audio_dev { ktime_t start_time; /* number of frames sent since start_time */ s64 frames_sent; + struct audio_source_config *config; }; static inline struct audio_dev *func_to_audio(struct usb_function *f) @@ -268,6 +277,36 @@ static inline struct audio_dev *func_to_audio(struct usb_function *f) /*-------------------------------------------------------------------------*/ +struct audio_source_instance { + struct usb_function_instance func_inst; + const char *name; + struct audio_source_config *config; + struct device *audio_device; +}; + +static void audio_source_attr_release(struct config_item *item); + +static struct configfs_item_operations audio_source_item_ops = { + .release = audio_source_attr_release, +}; + +static struct config_item_type audio_source_func_type = { + .ct_item_ops = &audio_source_item_ops, + .ct_owner = THIS_MODULE, +}; + +static ssize_t audio_source_pcm_show(struct device *dev, + struct device_attribute *attr, char *buf); + +static DEVICE_ATTR(pcm, S_IRUGO, audio_source_pcm_show, NULL); + +static struct device_attribute *audio_source_function_attributes[] = { + &dev_attr_pcm, + NULL +}; + +/*--------------------------------------------------------------------------*/ + static struct usb_request *audio_request_new(struct usb_ep *ep, int buffer_size) { struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); @@ -561,6 +600,13 @@ static void audio_build_desc(struct audio_dev *audio) memcpy(sam_freq, &rate, 3); } + +static int snd_card_setup(struct usb_configuration *c, + struct audio_source_config *config); +static struct audio_source_instance *to_fi_audio_source( + const struct usb_function_instance *fi); + + /* audio function driver setup/binding */ static int audio_bind(struct usb_configuration *c, struct usb_function *f) @@ -571,6 +617,18 @@ audio_bind(struct usb_configuration *c, struct usb_function *f) struct usb_ep *ep; struct usb_request *req; int i; + int err; + + if (IS_ENABLED(CONFIG_USB_CONFIGFS)) { + struct audio_source_instance *fi_audio = + to_fi_audio_source(f->fi); + struct audio_source_config *config = + fi_audio->config; + + err = snd_card_setup(c, config); + if (err) + return err; + } audio_build_desc(audio); @@ -636,6 +694,16 @@ audio_unbind(struct usb_configuration *c, struct usb_function *f) audio->pcm = NULL; audio->substream = NULL; audio->in_ep = NULL; + + if (IS_ENABLED(CONFIG_USB_CONFIGFS)) { + struct audio_source_instance *fi_audio = + to_fi_audio_source(f->fi); + struct audio_source_config *config = + fi_audio->config; + + config->card = -1; + config->device = -1; + } } static void audio_pcm_playback_start(struct audio_dev *audio) @@ -779,8 +847,6 @@ int audio_source_bind_config(struct usb_configuration *c, struct audio_source_config *config) { struct audio_dev *audio; - struct snd_card *card; - struct snd_pcm *pcm; int err; config->card = -1; @@ -788,6 +854,31 @@ int audio_source_bind_config(struct usb_configuration *c, audio = &_audio_dev; + err = snd_card_setup(c, config); + if (err) + return err; + + err = usb_add_function(c, &audio->func); + if (err) + goto add_fail; + + return 0; + +add_fail: + snd_card_free(audio->card); + return err; +} + +static int snd_card_setup(struct usb_configuration *c, + struct audio_source_config *config) +{ + struct audio_dev *audio; + struct snd_card *card; + struct snd_pcm *pcm; + int err; + + audio = &_audio_dev; + err = snd_card_new(&c->cdev->gadget->dev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1, THIS_MODULE, 0, &card); @@ -817,18 +908,147 @@ int audio_source_bind_config(struct usb_configuration *c, if (err) goto register_fail; - err = usb_add_function(c, &audio->func); - if (err) - goto add_fail; - config->card = pcm->card->number; config->device = pcm->device; audio->card = card; return 0; -add_fail: register_fail: pcm_fail: snd_card_free(audio->card); return err; } + +static struct audio_source_instance *to_audio_source_instance( + struct config_item *item) +{ + return container_of(to_config_group(item), struct audio_source_instance, + func_inst.group); +} + +static struct audio_source_instance *to_fi_audio_source( + const struct usb_function_instance *fi) +{ + return container_of(fi, struct audio_source_instance, func_inst); +} + +static void audio_source_attr_release(struct config_item *item) +{ + struct audio_source_instance *fi_audio = to_audio_source_instance(item); + + usb_put_function_instance(&fi_audio->func_inst); +} + +static int audio_source_set_inst_name(struct usb_function_instance *fi, + const char *name) +{ + struct audio_source_instance *fi_audio; + char *ptr; + int name_len; + + name_len = strlen(name) + 1; + if (name_len > MAX_INST_NAME_LEN) + return -ENAMETOOLONG; + + ptr = kstrndup(name, name_len, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + fi_audio = to_fi_audio_source(fi); + fi_audio->name = ptr; + + return 0; +} + +static void audio_source_free_inst(struct usb_function_instance *fi) +{ + struct audio_source_instance *fi_audio; + + fi_audio = to_fi_audio_source(fi); + device_destroy(fi_audio->audio_device->class, + fi_audio->audio_device->devt); + kfree(fi_audio->name); + kfree(fi_audio->config); +} + +static ssize_t audio_source_pcm_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct audio_source_instance *fi_audio = dev_get_drvdata(dev); + struct audio_source_config *config = fi_audio->config; + + /* print PCM card and device numbers */ + return sprintf(buf, "%d %d\n", config->card, config->device); +} + +struct device *create_function_device(char *name); + +static struct usb_function_instance *audio_source_alloc_inst(void) +{ + struct audio_source_instance *fi_audio; + struct device_attribute **attrs; + struct device_attribute *attr; + struct device *dev; + void *err_ptr; + int err = 0; + + fi_audio = kzalloc(sizeof(*fi_audio), GFP_KERNEL); + if (!fi_audio) + return ERR_PTR(-ENOMEM); + + fi_audio->func_inst.set_inst_name = audio_source_set_inst_name; + fi_audio->func_inst.free_func_inst = audio_source_free_inst; + + fi_audio->config = kzalloc(sizeof(struct audio_source_config), + GFP_KERNEL); + if (!fi_audio->config) { + err_ptr = ERR_PTR(-ENOMEM); + goto fail_audio; + } + + config_group_init_type_name(&fi_audio->func_inst.group, "", + &audio_source_func_type); + dev = create_function_device("f_audio_source"); + + if (IS_ERR(dev)) { + err_ptr = dev; + goto fail_audio_config; + } + + fi_audio->config->card = -1; + fi_audio->config->device = -1; + fi_audio->audio_device = dev; + + attrs = audio_source_function_attributes; + if (attrs) { + while ((attr = *attrs++) && !err) + err = device_create_file(dev, attr); + if (err) { + err_ptr = ERR_PTR(-EINVAL); + goto fail_device; + } + } + + dev_set_drvdata(dev, fi_audio); + _audio_dev.config = fi_audio->config; + + return &fi_audio->func_inst; + +fail_device: + device_destroy(dev->class, dev->devt); +fail_audio_config: + kfree(fi_audio->config); +fail_audio: + kfree(fi_audio); + return err_ptr; + +} + +static struct usb_function *audio_source_alloc(struct usb_function_instance *fi) +{ + return &_audio_dev.func; +} + +DECLARE_USB_FUNCTION_INIT(audio_source, audio_source_alloc_inst, + audio_source_alloc); +MODULE_LICENSE("GPL"); -- GitLab From f188487ff6ebb0da75cdfcdf49c79e3beef727e2 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 27 Mar 2015 14:49:55 -0700 Subject: [PATCH 0851/1442] ANDROID: usb: gadget: Move gadget functions code 3.18 kernel has reorganized drivers/usb/gadget directory. Moving gadget functions drivers from drivers/usb/gadget to drivers/usb/gadget/function Signed-off-by: Badhri Jagan Sridharan Change-Id: I1eab0190f8d42e3be1b4e91ad3bc3a2dc853b0ef --- drivers/usb/gadget/Makefile | 7 ------- drivers/usb/gadget/function/Makefile | 6 ++++++ drivers/usb/gadget/{ => function}/f_audio_source.c | 0 drivers/usb/gadget/{ => function}/f_mtp.c | 0 drivers/usb/gadget/{ => function}/f_mtp.h | 0 drivers/usb/gadget/{ => function}/f_ptp.c | 0 6 files changed, 6 insertions(+), 7 deletions(-) rename drivers/usb/gadget/{ => function}/f_audio_source.c (100%) rename drivers/usb/gadget/{ => function}/f_mtp.c (100%) rename drivers/usb/gadget/{ => function}/f_mtp.h (100%) rename drivers/usb/gadget/{ => function}/f_ptp.c (100%) diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile index 386ed6343e33..598a67d6ba05 100644 --- a/drivers/usb/gadget/Makefile +++ b/drivers/usb/gadget/Makefile @@ -10,10 +10,3 @@ libcomposite-y := usbstring.o config.o epautoconf.o libcomposite-y += composite.o functions.o configfs.o u_f.o obj-$(CONFIG_USB_GADGET) += udc/ function/ legacy/ - -usb_f_mtp-y := f_mtp.o -obj-$(CONFIG_USB_F_MTP) += usb_f_mtp.o -usb_f_ptp-y := f_ptp.o -obj-$(CONFIG_USB_F_PTP) += usb_f_ptp.o -usb_f_audio_source-y := f_audio_source.o -obj-$(CONFIG_USB_F_AUDIO_SRC) += usb_f_audio_source.o diff --git a/drivers/usb/gadget/function/Makefile b/drivers/usb/gadget/function/Makefile index cb8c225e8549..8e6824f91e93 100644 --- a/drivers/usb/gadget/function/Makefile +++ b/drivers/usb/gadget/function/Makefile @@ -46,3 +46,9 @@ usb_f_printer-y := f_printer.o obj-$(CONFIG_USB_F_PRINTER) += usb_f_printer.o usb_f_tcm-y := f_tcm.o obj-$(CONFIG_USB_F_TCM) += usb_f_tcm.o +usb_f_mtp-y := f_mtp.o +obj-$(CONFIG_USB_F_MTP) += usb_f_mtp.o +usb_f_ptp-y := f_ptp.o +obj-$(CONFIG_USB_F_PTP) += usb_f_ptp.o +usb_f_audio_source-y := f_audio_source.o +obj-$(CONFIG_USB_F_AUDIO_SRC) += usb_f_audio_source.o diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c similarity index 100% rename from drivers/usb/gadget/f_audio_source.c rename to drivers/usb/gadget/function/f_audio_source.c diff --git a/drivers/usb/gadget/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c similarity index 100% rename from drivers/usb/gadget/f_mtp.c rename to drivers/usb/gadget/function/f_mtp.c diff --git a/drivers/usb/gadget/f_mtp.h b/drivers/usb/gadget/function/f_mtp.h similarity index 100% rename from drivers/usb/gadget/f_mtp.h rename to drivers/usb/gadget/function/f_mtp.h diff --git a/drivers/usb/gadget/f_ptp.c b/drivers/usb/gadget/function/f_ptp.c similarity index 100% rename from drivers/usb/gadget/f_ptp.c rename to drivers/usb/gadget/function/f_ptp.c -- GitLab From f9247cea0591ede12a9fcc9dcd1d91b1617d4db8 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Sun, 23 Nov 2014 17:21:22 -0800 Subject: [PATCH 0852/1442] ANDROID: usb: gadget: Accessory:Migrate to USB_FUNCTION API This patch adds support to use Android accessory gadget function through the DECLARE_USB_FUNCTION_INIT interface. Signed-off-by: Badhri Jagan Sridharan Change-Id: Ib352752d5bc905fa1df9049b53eabf1294930db7 --- drivers/usb/gadget/Kconfig | 12 ++- drivers/usb/gadget/Makefile | 3 + drivers/usb/gadget/f_accessory.c | 153 ++++++++++++++++++++++++++++++- 3 files changed, 164 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index eea7fe0ca8ee..7e7402af6edc 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -218,6 +218,9 @@ config USB_F_PTP config USB_F_AUDIO_SRC tristate +config USB_F_ACC + tristate + # this first set of drivers all depend on bulk-capable hardware. config USB_CONFIGFS @@ -385,9 +388,16 @@ config USB_CONFIGFS_F_PTP help USB gadget PTP support +config USB_CONFIGFS_F_ACC + boolean "Accessory gadget" + depends on USB_CONFIGFS + select USB_F_ACC + help + USB gadget Accessory support + config USB_CONFIGFS_F_AUDIO_SRC boolean "Audio Source gadget" - depends on USB_CONFIGFS + depends on USB_CONFIGFS && USB_CONFIGFS_F_ACC select USB_F_AUDIO_SRC help USB gadget Audio Source support diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile index 598a67d6ba05..1b55d76ee049 100644 --- a/drivers/usb/gadget/Makefile +++ b/drivers/usb/gadget/Makefile @@ -9,4 +9,7 @@ obj-$(CONFIG_USB_LIBCOMPOSITE) += libcomposite.o libcomposite-y := usbstring.o config.o epautoconf.o libcomposite-y += composite.o functions.o configfs.o u_f.o +usb_f_accessory-y := f_accessory.o +obj-$(CONFIG_USB_F_ACC) += usb_f_accessory.o + obj-$(CONFIG_USB_GADGET) += udc/ function/ legacy/ diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index 9ffe017bf1cf..d9db5c595b99 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -39,6 +39,10 @@ #include #include +#include +#include + +#define MAX_INST_NAME_LEN 40 #define BULK_BUFFER_SIZE 16384 #define ACC_STRING_SIZE 256 @@ -194,6 +198,11 @@ static struct usb_gadget_strings *acc_strings[] = { /* temporary variable used between acc_open() and acc_gadget_bind() */ static struct acc_dev *_acc_dev; +struct acc_instance { + struct usb_function_instance func_inst; + const char *name; +}; + static inline struct acc_dev *func_to_dev(struct usb_function *f) { return container_of(f, struct acc_dev, function); @@ -775,7 +784,7 @@ static struct hid_driver acc_hid_driver = { .probe = acc_hid_probe, }; -static int acc_ctrlrequest(struct usb_composite_dev *cdev, +int acc_ctrlrequest(struct usb_composite_dev *cdev, const struct usb_ctrlrequest *ctrl) { struct acc_dev *dev = _acc_dev; @@ -879,9 +888,11 @@ static int acc_ctrlrequest(struct usb_composite_dev *cdev, w_value, w_index, w_length); return value; } +EXPORT_SYMBOL_GPL(acc_ctrlrequest); static int -acc_function_bind(struct usb_configuration *c, struct usb_function *f) +__acc_function_bind(struct usb_configuration *c, + struct usb_function *f, bool configfs) { struct usb_composite_dev *cdev = c->cdev; struct acc_dev *dev = func_to_dev(f); @@ -890,6 +901,16 @@ acc_function_bind(struct usb_configuration *c, struct usb_function *f) DBG(cdev, "acc_function_bind dev: %p\n", dev); + if (configfs) { + if (acc_string_defs[INTERFACE_STRING_INDEX].id == 0) { + ret = usb_string_id(c->cdev); + if (ret < 0) + return ret; + acc_string_defs[INTERFACE_STRING_INDEX].id = ret; + acc_interface_desc.iInterface = ret; + } + dev->cdev = c->cdev; + } ret = hid_register_driver(&acc_hid_driver); if (ret) return ret; @@ -922,6 +943,17 @@ acc_function_bind(struct usb_configuration *c, struct usb_function *f) return 0; } +static int +acc_function_bind(struct usb_configuration *c, struct usb_function *f) { + return __acc_function_bind(c, f, false); +} + +static int +acc_function_bind_configfs(struct usb_configuration *c, + struct usb_function *f) { + return __acc_function_bind(c, f, true); +} + static void kill_all_hid_devices(struct acc_dev *dev) { @@ -1179,11 +1211,12 @@ static int acc_setup(void) return ret; } -static void acc_disconnect(void) +void acc_disconnect(void) { /* unregister all HID devices if USB is disconnected */ kill_all_hid_devices(_acc_dev); } +EXPORT_SYMBOL_GPL(acc_disconnect); static void acc_cleanup(void) { @@ -1191,3 +1224,117 @@ static void acc_cleanup(void) kfree(_acc_dev); _acc_dev = NULL; } +static struct acc_instance *to_acc_instance(struct config_item *item) +{ + return container_of(to_config_group(item), struct acc_instance, + func_inst.group); +} + +static void acc_attr_release(struct config_item *item) +{ + struct acc_instance *fi_acc = to_acc_instance(item); + + usb_put_function_instance(&fi_acc->func_inst); +} + +static struct configfs_item_operations acc_item_ops = { + .release = acc_attr_release, +}; + +static struct config_item_type acc_func_type = { + .ct_item_ops = &acc_item_ops, + .ct_owner = THIS_MODULE, +}; + +static struct acc_instance *to_fi_acc(struct usb_function_instance *fi) +{ + return container_of(fi, struct acc_instance, func_inst); +} + +static int acc_set_inst_name(struct usb_function_instance *fi, const char *name) +{ + struct acc_instance *fi_acc; + char *ptr; + int name_len; + + name_len = strlen(name) + 1; + if (name_len > MAX_INST_NAME_LEN) + return -ENAMETOOLONG; + + ptr = kstrndup(name, name_len, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + fi_acc = to_fi_acc(fi); + fi_acc->name = ptr; + return 0; +} + +static void acc_free_inst(struct usb_function_instance *fi) +{ + struct acc_instance *fi_acc; + + fi_acc = to_fi_acc(fi); + kfree(fi_acc->name); + acc_cleanup(); +} + +static struct usb_function_instance *acc_alloc_inst(void) +{ + struct acc_instance *fi_acc; + struct acc_dev *dev; + int err; + + fi_acc = kzalloc(sizeof(*fi_acc), GFP_KERNEL); + if (!fi_acc) + return ERR_PTR(-ENOMEM); + fi_acc->func_inst.set_inst_name = acc_set_inst_name; + fi_acc->func_inst.free_func_inst = acc_free_inst; + + err = acc_setup(); + if (err) { + kfree(fi_acc); + pr_err("Error setting ACCESSORY\n"); + return ERR_PTR(err); + } + + config_group_init_type_name(&fi_acc->func_inst.group, + "", &acc_func_type); + dev = _acc_dev; + return &fi_acc->func_inst; +} + +static void acc_free(struct usb_function *f) +{ +/*NO-OP: no function specific resource allocation in mtp_alloc*/ +} + +int acc_ctrlrequest_configfs(struct usb_function *f, + const struct usb_ctrlrequest *ctrl) { + if (f->config != NULL && f->config->cdev != NULL) + return acc_ctrlrequest(f->config->cdev, ctrl); + else + return -1; +} + +static struct usb_function *acc_alloc(struct usb_function_instance *fi) +{ + struct acc_dev *dev = _acc_dev; + + pr_info("acc_alloc\n"); + + dev->function.name = "accessory"; + dev->function.strings = acc_strings, + dev->function.fs_descriptors = fs_acc_descs; + dev->function.hs_descriptors = hs_acc_descs; + dev->function.bind = acc_function_bind_configfs; + dev->function.unbind = acc_function_unbind; + dev->function.set_alt = acc_function_set_alt; + dev->function.disable = acc_function_disable; + dev->function.free_func = acc_free; + dev->function.setup = acc_ctrlrequest_configfs; + + return &dev->function; +} +DECLARE_USB_FUNCTION_INIT(accessory, acc_alloc_inst, acc_alloc); +MODULE_LICENSE("GPL"); -- GitLab From 3cad9bef9d7a4fe60412b0a44eb8ee8ed788a86e Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 30 Mar 2015 15:32:22 -0700 Subject: [PATCH 0853/1442] ANDROID: usb: gadget: Relocate f_accessory 3.18 kernel has reorganized drivers/usb/gadget directory. Moving accessory gadget driver from drivers/usb/gadget to drivers/usb/gadget/function Signed-off-by: Badhri Jagan Sridharan Change-Id: If73c6df0537c4b1f51338ed3b0db817e51f06b4a --- drivers/usb/gadget/Makefile | 3 --- drivers/usb/gadget/function/Makefile | 2 ++ drivers/usb/gadget/{ => function}/f_accessory.c | 0 3 files changed, 2 insertions(+), 3 deletions(-) rename drivers/usb/gadget/{ => function}/f_accessory.c (100%) diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile index 1b55d76ee049..598a67d6ba05 100644 --- a/drivers/usb/gadget/Makefile +++ b/drivers/usb/gadget/Makefile @@ -9,7 +9,4 @@ obj-$(CONFIG_USB_LIBCOMPOSITE) += libcomposite.o libcomposite-y := usbstring.o config.o epautoconf.o libcomposite-y += composite.o functions.o configfs.o u_f.o -usb_f_accessory-y := f_accessory.o -obj-$(CONFIG_USB_F_ACC) += usb_f_accessory.o - obj-$(CONFIG_USB_GADGET) += udc/ function/ legacy/ diff --git a/drivers/usb/gadget/function/Makefile b/drivers/usb/gadget/function/Makefile index 8e6824f91e93..78682d5e4dc7 100644 --- a/drivers/usb/gadget/function/Makefile +++ b/drivers/usb/gadget/function/Makefile @@ -52,3 +52,5 @@ usb_f_ptp-y := f_ptp.o obj-$(CONFIG_USB_F_PTP) += usb_f_ptp.o usb_f_audio_source-y := f_audio_source.o obj-$(CONFIG_USB_F_AUDIO_SRC) += usb_f_audio_source.o +usb_f_accessory-y := f_accessory.o +obj-$(CONFIG_USB_F_ACC) += usb_f_accessory.o diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c similarity index 100% rename from drivers/usb/gadget/f_accessory.c rename to drivers/usb/gadget/function/f_accessory.c -- GitLab From 9061f251d44cbc87acb82d976b5cd4a6ad0943b8 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 6 May 2015 13:40:15 -0700 Subject: [PATCH 0854/1442] ANDROID: usb: gadget: Do not disconnect unregistered dev configfs_composite_unbind sets the gadget data to null. Therefore, add check in disconnect function to make sure that cdev is not NULL. Prints a WARN message if the driver tries to redundantly disconnect a gadget. Signed-off-by: Badhri Jagan Sridharan Change-Id: I248cb7175d0dd9a51c18053dd39475d8b3284f6d --- drivers/usb/gadget/composite.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 32176f779861..b2f0c767f751 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1983,6 +1983,12 @@ void composite_disconnect(struct usb_gadget *gadget) struct usb_composite_dev *cdev = get_gadget_data(gadget); unsigned long flags; + if (cdev == NULL) { + WARN(1, "%s: Calling disconnect on a Gadget that is \ + not connected\n", __func__); + return; + } + /* REVISIT: should we have config and device level * disconnect callbacks? */ -- GitLab From 9150ea1e9f184d59f991813d685195752bfa3c0b Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 14 Jul 2015 15:46:11 -0700 Subject: [PATCH 0855/1442] ANDROID: usb:gadget:Add "state" attribute to android_device Added a device attribute to android_device to determine USB_GADGET's state Signed-off-by: Badhri Jagan Sridharan Change-Id: I17f8903120df96bf2f4bf441940b53a87b818230 --- drivers/usb/gadget/configfs.c | 66 ++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 3cff594aaeea..aff546da1da2 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1554,11 +1554,49 @@ static const struct usb_gadget_driver configfs_driver_template = { .match_existing_only = 1, }; +#ifdef CONFIG_USB_CONFIGFS_UEVENT +static ssize_t state_show(struct device *pdev, struct device_attribute *attr, + char *buf) +{ + struct gadget_info *dev = dev_get_drvdata(pdev); + struct usb_composite_dev *cdev; + char *state = "DISCONNECTED"; + unsigned long flags; + + if (!dev) + goto out; + + cdev = &dev->cdev; + + if (!cdev) + goto out; + + spin_lock_irqsave(&cdev->lock, flags); + if (cdev->config) + state = "CONFIGURED"; + else if (dev->connected) + state = "CONNECTED"; + spin_unlock_irqrestore(&cdev->lock, flags); +out: + return sprintf(buf, "%s\n", state); +} + +static DEVICE_ATTR(state, S_IRUGO, state_show, NULL); + +static struct device_attribute *android_usb_attributes[] = { + &dev_attr_state, + NULL +}; +#endif + static struct config_group *gadgets_make( struct config_group *group, const char *name) { struct gadget_info *gi; + struct device_attribute **attrs; + struct device_attribute *attr; + int err; gi = kzalloc(sizeof(*gi), GFP_KERNEL); if (!gi) @@ -1608,12 +1646,31 @@ static struct config_group *gadgets_make( MKDEV(0, 0), NULL, "android0"); if (IS_ERR(android_device)) goto err; + + dev_set_drvdata(android_device, gi); + + attrs = android_usb_attributes; + while ((attr = *attrs++)) { + err = device_create_file(android_device, attr); + if (err) + goto err1; + } #endif if (!gi->composite.gadget_driver.function) - goto err; + goto err1; return &gi->group; + +err1: +#ifdef CONFIG_USB_CONFIGFS_UEVENT + attrs = android_usb_attributes; + while ((attr = *attrs++)) + device_remove_file(android_device, attr); + + device_destroy(android_device->class, + android_device->devt); +#endif err: kfree(gi); return ERR_PTR(-ENOMEM); @@ -1621,8 +1678,15 @@ static struct config_group *gadgets_make( static void gadgets_drop(struct config_group *group, struct config_item *item) { + struct device_attribute **attrs; + struct device_attribute *attr; + config_item_put(item); + #ifdef CONFIG_USB_CONFIGFS_UEVENT + attrs = android_usb_attributes; + while ((attr = *attrs++)) + device_remove_file(android_device, attr); device_destroy(android_device->class, android_device->devt); #endif } -- GitLab From 171b8124fe40a7b37050425f0ab59c66a436d79f Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Sat, 1 Aug 2015 03:26:51 +0530 Subject: [PATCH 0856/1442] ANDROID: usb: gadget: fix NULL ptr derefer while symlinking PTP func Fix NULL pointer dereference while trying to link PTP function to a gadget configuration without creating MTP function. PTP piggyback on MTP function so make sure we have MTP function created beforehand. Otherwise we run into following kernel panic: ----------------------- [ 70.329957] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 70.330738] pgd = dd8ec000 [ 70.330916] [00000000] *pgd=00000000 [ 70.331663] Internal error: Oops: 805 [#1] SMP THUMB2 [ 70.332155] CPU: 0 PID: 2067 Comm: ln Not tainted 3.18.0-00587-gdfa582e #1 [ 70.332511] task: dd9c92c0 ti: dd822000 task.ti: dd822000 [ 70.333094] PC is at function_alloc_mtp_ptp+0xe/0x68 [ 70.333311] LR is at usb_get_function+0x11/0x1c [ 70.333489] pc : [] lr : [] psr: 60070033 <..snip..> [ 70.384111] 3fc0: bec14ae4 00000004 bec14c0a 00000053 00000004 b6f0422d 00000000 bec14adc [ 70.384369] 3fe0: bec14af8 bec14a98 b6f071f3 b6e8977c 20070010 bec14c0d 00000000 00000000 [ 70.384832] [] (function_alloc_mtp_ptp) from [] (usb_get_function+0x11/0x1c) [ 70.385146] [] (usb_get_function) from [] (config_usb_cfg_link+0x87/0xa8) [ 70.385421] [] (config_usb_cfg_link) from [] (configfs_symlink+0xb7/0x1c8) [ 70.385696] [] (configfs_symlink) from [] (vfs_symlink+0x85/0xc0) [ 70.386010] [] (vfs_symlink) from [] (SyS_symlinkat+0x43/0x70) [ 70.386261] [] (SyS_symlinkat) from [] (ret_fast_syscall+0x1/0x5c) [ 70.386610] Code: eb04 4a0f 6e03 480f (e883) 0005 [ 70.387346] ---[ end trace 8dba7c552e02f8fa ]--- [ 70.387647] Kernel panic - not syncing: Fatal exception [ 70.387980] ---[ end Kernel panic - not syncing: Fatal exception ----------------------- Steps to reproduce the kernel panic: mount -t configfs none /config mkdir /config/usb_gadget/g1 cd /config/usb_gadget/g1 echo 0x18d1 > idVendor echo 0x4e26 > idProduct mkdir strings/0x409 echo 0123459876 > strings/0x409/serialnumber echo Asus > strings/0x409/manufacturer echo Nexus7 > strings/0x409/product mkdir configs/c.1 mkdir configs/c.1/strings/0x409 echo "Conf 1" > configs/c.1/strings/0x409/configuration echo 120 > configs/c.1/MaxPower mkdir functions/ptp.ptp ln -s functions/ptp.ptp configs/c.1/ptp.ptp Also MTP and PTP are mutually exclusive functions so make sure we have only one of it linked to a configuration at a time. Otherwise it opens up another set of bug(s?). Signed-off-by: Amit Pundir --- drivers/usb/gadget/configfs.c | 5 +++++ drivers/usb/gadget/function/f_mtp.c | 18 +++++++++++++++++- drivers/usb/gadget/functions.c | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index aff546da1da2..39adb589bb83 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -426,6 +426,11 @@ static int config_usb_cfg_link( } f = usb_get_function(fi); + if (f == NULL) { + /* Are we trying to symlink PTP without MTP function? */ + ret = -EINVAL; /* Invalid Configuration */ + goto out; + } if (IS_ERR(f)) { ret = PTR_ERR(f); goto out; diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index 82f6b2ebaebb..03a61f8b9d48 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -1426,8 +1426,24 @@ struct usb_function *function_alloc_mtp_ptp(struct usb_function_instance *fi, bool mtp_config) { struct mtp_instance *fi_mtp = to_fi_mtp(fi); - struct mtp_dev *dev = fi_mtp->dev; + struct mtp_dev *dev; + + /* + * PTP piggybacks on MTP function so make sure we have + * created MTP function before we associate this PTP + * function with a gadget configuration. + */ + if (fi_mtp->dev == NULL) { + pr_err("Error: Create MTP function before linking" + " PTP function with a gadget configuration\n"); + pr_err("\t1: Delete existing PTP function if any\n"); + pr_err("\t2: Create MTP function\n"); + pr_err("\t3: Create and symlink PTP function" + " with a gadget configuration\n"); + return NULL; + } + dev = fi_mtp->dev; dev->function.name = DRIVER_NAME; dev->function.strings = mtp_strings; if (mtp_config) { diff --git a/drivers/usb/gadget/functions.c b/drivers/usb/gadget/functions.c index b13f839e7368..389c1f3d0fee 100644 --- a/drivers/usb/gadget/functions.c +++ b/drivers/usb/gadget/functions.c @@ -58,7 +58,7 @@ struct usb_function *usb_get_function(struct usb_function_instance *fi) struct usb_function *f; f = fi->fd->alloc_func(fi); - if (IS_ERR(f)) + if ((f == NULL) || IS_ERR(f)) return f; f->fi = fi; return f; -- GitLab From 7dfee9ca8fc4b03867b6089db9b0e645e9ba164f Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 31 Aug 2015 21:36:07 -0700 Subject: [PATCH 0857/1442] ANDROID: usb: phy: Dual role sysfs class definition This CL adds a new class to monitor and change dual role usb ports from userspace. The usb phy drivers can register to the dual_role_usb class and expose the capabilities of the ports. The phy drivers can decide on whether a specific attribute can be changed from userspace by choosing to implement the appropriate callback. Cherry-picked from https://android-review.googlesource.com/#/c/167310/ Signed-off-by: Badhri Jagan Sridharan Bug: 21615151 Change-Id: Id1c4aaa97e898264d7006381a7badd029b5d9789 --- .../ABI/testing/sysfs-class-dual-role-usb | 71 +++ drivers/usb/phy/Kconfig | 9 + drivers/usb/phy/Makefile | 2 + drivers/usb/phy/class-dual-role.c | 529 ++++++++++++++++++ include/linux/usb/class-dual-role.h | 128 +++++ 5 files changed, 739 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-class-dual-role-usb create mode 100644 drivers/usb/phy/class-dual-role.c create mode 100644 include/linux/usb/class-dual-role.h diff --git a/Documentation/ABI/testing/sysfs-class-dual-role-usb b/Documentation/ABI/testing/sysfs-class-dual-role-usb new file mode 100644 index 000000000000..a900fd75430c --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-dual-role-usb @@ -0,0 +1,71 @@ +What: /sys/class/dual_role_usb/.../ +Date: June 2015 +Contact: Badhri Jagan Sridharan +Description: + Provide a generic interface to monitor and change + the state of dual role usb ports. The name here + refers to the name mentioned in the + dual_role_phy_desc that is passed while registering + the dual_role_phy_intstance through + devm_dual_role_instance_register. + +What: /sys/class/dual_role_usb/.../supported_modes +Date: June 2015 +Contact: Badhri Jagan Sridharan +Description: + This is a static node, once initialized this + is not expected to change during runtime. "dfp" + refers to "downstream facing port" i.e. port can + only act as host. "ufp" refers to "upstream + facing port" i.e. port can only act as device. + "dfp ufp" refers to "dual role port" i.e. the port + can either be a host port or a device port. + +What: /sys/class/dual_role_usb/.../mode +Date: June 2015 +Contact: Badhri Jagan Sridharan +Description: + The mode node refers to the current mode in which the + port is operating. "dfp" for host ports. "ufp" for device + ports and "none" when cable is not connected. + + On devices where the USB mode is software-controllable, + userspace can change the mode by writing "dfp" or "ufp". + On devices where the USB mode is fixed in hardware, + this attribute is read-only. + +What: /sys/class/dual_role_usb/.../power_role +Date: June 2015 +Contact: Badhri Jagan Sridharan +Description: + The power_role node mentions whether the port + is "sink"ing or "source"ing power. "none" if + they are not connected. + + On devices implementing USB Power Delivery, + userspace can control the power role by writing "sink" or + "source". On devices without USB-PD, this attribute is + read-only. + +What: /sys/class/dual_role_usb/.../data_role +Date: June 2015 +Contact: Badhri Jagan Sridharan +Description: + The data_role node mentions whether the port + is acting as "host" or "device" for USB data connection. + "none" if there is no active data link. + + On devices implementing USB Power Delivery, userspace + can control the data role by writing "host" or "device". + On devices without USB-PD, this attribute is read-only + +What: /sys/class/dual_role_usb/.../powers_vconn +Date: June 2015 +Contact: Badhri Jagan Sridharan +Description: + The powers_vconn node mentions whether the port + is supplying power for VCONN pin. + + On devices with software control of VCONN, + userspace can disable the power supply to VCONN by writing "n", + or enable the power supply by writing "y". diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index 63bbfb4e1be6..aa5e9fc84642 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -216,4 +216,13 @@ config USB_ULPI_VIEWPORT Provides read/write operations to the ULPI phy register set for controllers with a viewport register (e.g. Chipidea/ARC controllers). +config DUAL_ROLE_USB_INTF + bool "Generic DUAL ROLE sysfs interface" + depends on SYSFS && USB_PHY + help + A generic sysfs interface to track and change the state of + dual role usb phys. The usb phy drivers can register to + this interface to expose it capabilities to the userspace + and thereby allowing userspace to change the port mode. + endmenu diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile index fee55847a89c..f65ac3e1fc07 100644 --- a/drivers/usb/phy/Makefile +++ b/drivers/usb/phy/Makefile @@ -4,6 +4,8 @@ obj-$(CONFIG_USB_PHY) += phy.o obj-$(CONFIG_OF) += of.o obj-$(CONFIG_USB_OTG_WAKELOCK) += otg-wakelock.o +obj-$(CONFIG_DUAL_ROLE_USB_INTF) += class-dual-role.o + # transceiver drivers, keep the list sorted obj-$(CONFIG_AB8500_USB) += phy-ab8500-usb.o diff --git a/drivers/usb/phy/class-dual-role.c b/drivers/usb/phy/class-dual-role.c new file mode 100644 index 000000000000..ce889dd529cb --- /dev/null +++ b/drivers/usb/phy/class-dual-role.c @@ -0,0 +1,529 @@ +/* + * class-dual-role.c + * + * Copyright (C) 2015 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DUAL_ROLE_NOTIFICATION_TIMEOUT 2000 + +static ssize_t dual_role_store_property(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count); +static ssize_t dual_role_show_property(struct device *dev, + struct device_attribute *attr, + char *buf); + +#define DUAL_ROLE_ATTR(_name) \ +{ \ + .attr = { .name = #_name }, \ + .show = dual_role_show_property, \ + .store = dual_role_store_property, \ +} + +static struct device_attribute dual_role_attrs[] = { + DUAL_ROLE_ATTR(supported_modes), + DUAL_ROLE_ATTR(mode), + DUAL_ROLE_ATTR(power_role), + DUAL_ROLE_ATTR(data_role), + DUAL_ROLE_ATTR(powers_vconn), +}; + +struct class *dual_role_class; +EXPORT_SYMBOL_GPL(dual_role_class); + +static struct device_type dual_role_dev_type; + +static char *kstrdupcase(const char *str, gfp_t gfp, bool to_upper) +{ + char *ret, *ustr; + + ustr = ret = kmalloc(strlen(str) + 1, gfp); + + if (!ret) + return NULL; + + while (*str) + *ustr++ = to_upper ? toupper(*str++) : tolower(*str++); + + *ustr = 0; + + return ret; +} + +static void dual_role_changed_work(struct work_struct *work) +{ + struct dual_role_phy_instance *dual_role = + container_of(work, struct dual_role_phy_instance, + changed_work); + + dev_dbg(&dual_role->dev, "%s\n", __func__); + kobject_uevent(&dual_role->dev.kobj, KOBJ_CHANGE); +} + +void dual_role_instance_changed(struct dual_role_phy_instance *dual_role) +{ + dev_dbg(&dual_role->dev, "%s\n", __func__); + pm_wakeup_event(&dual_role->dev, DUAL_ROLE_NOTIFICATION_TIMEOUT); + schedule_work(&dual_role->changed_work); +} +EXPORT_SYMBOL_GPL(dual_role_instance_changed) + +int dual_role_get_property(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + unsigned int *val) +{ + return dual_role->desc->get_property(dual_role, prop, val); +} +EXPORT_SYMBOL_GPL(dual_role_get_property); + +int dual_role_set_property(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + const unsigned int *val) +{ + if (!dual_role->desc->set_property) + return -ENODEV; + + return dual_role->desc->set_property(dual_role, prop, val); +} +EXPORT_SYMBOL_GPL(dual_role_set_property); + +int dual_role_property_is_writeable(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop) +{ + if (!dual_role->desc->property_is_writeable) + return -ENODEV; + + return dual_role->desc->property_is_writeable(dual_role, prop); +} +EXPORT_SYMBOL_GPL(dual_role_property_is_writeable); + +static void dual_role_dev_release(struct device *dev) +{ + struct dual_role_phy_instance *dual_role = + container_of(dev, struct dual_role_phy_instance, dev); + pr_debug("device: '%s': %s\n", dev_name(dev), __func__); + kfree(dual_role); +} + +static struct dual_role_phy_instance *__must_check +__dual_role_register(struct device *parent, + const struct dual_role_phy_desc *desc) +{ + struct device *dev; + struct dual_role_phy_instance *dual_role; + int rc; + + dual_role = kzalloc(sizeof(*dual_role), GFP_KERNEL); + if (!dual_role) + return ERR_PTR(-ENOMEM); + + dev = &dual_role->dev; + + device_initialize(dev); + + dev->class = dual_role_class; + dev->type = &dual_role_dev_type; + dev->parent = parent; + dev->release = dual_role_dev_release; + dev_set_drvdata(dev, dual_role); + dual_role->desc = desc; + + rc = dev_set_name(dev, "%s", desc->name); + if (rc) + goto dev_set_name_failed; + + INIT_WORK(&dual_role->changed_work, dual_role_changed_work); + + rc = device_init_wakeup(dev, true); + if (rc) + goto wakeup_init_failed; + + rc = device_add(dev); + if (rc) + goto device_add_failed; + + dual_role_instance_changed(dual_role); + + return dual_role; + +device_add_failed: + device_init_wakeup(dev, false); +wakeup_init_failed: +dev_set_name_failed: + put_device(dev); + kfree(dual_role); + + return ERR_PTR(rc); +} + +static void dual_role_instance_unregister(struct dual_role_phy_instance + *dual_role) +{ + cancel_work_sync(&dual_role->changed_work); + device_init_wakeup(&dual_role->dev, false); + device_unregister(&dual_role->dev); +} + +static void devm_dual_role_release(struct device *dev, void *res) +{ + struct dual_role_phy_instance **dual_role = res; + + dual_role_instance_unregister(*dual_role); +} + +struct dual_role_phy_instance *__must_check +devm_dual_role_instance_register(struct device *parent, + const struct dual_role_phy_desc *desc) +{ + struct dual_role_phy_instance **ptr, *dual_role; + + ptr = devres_alloc(devm_dual_role_release, sizeof(*ptr), GFP_KERNEL); + + if (!ptr) + return ERR_PTR(-ENOMEM); + dual_role = __dual_role_register(parent, desc); + if (IS_ERR(dual_role)) { + devres_free(ptr); + } else { + *ptr = dual_role; + devres_add(parent, ptr); + } + return dual_role; +} +EXPORT_SYMBOL_GPL(devm_dual_role_instance_register); + +static int devm_dual_role_match(struct device *dev, void *res, void *data) +{ + struct dual_role_phy_instance **r = res; + + if (WARN_ON(!r || !*r)) + return 0; + + return *r == data; +} + +void devm_dual_role_instance_unregister(struct device *dev, + struct dual_role_phy_instance + *dual_role) +{ + int rc; + + rc = devres_release(dev, devm_dual_role_release, + devm_dual_role_match, dual_role); + WARN_ON(rc); +} +EXPORT_SYMBOL_GPL(devm_dual_role_instance_unregister); + +void *dual_role_get_drvdata(struct dual_role_phy_instance *dual_role) +{ + return dual_role->drv_data; +} +EXPORT_SYMBOL_GPL(dual_role_get_drvdata); + +/***************** Device attribute functions **************************/ + +/* port type */ +static char *supported_modes_text[] = { + "ufp dfp", "dfp", "ufp" +}; + +/* current mode */ +static char *mode_text[] = { + "ufp", "dfp", "none" +}; + +/* Power role */ +static char *pr_text[] = { + "source", "sink", "none" +}; + +/* Data role */ +static char *dr_text[] = { + "host", "device", "none" +}; + +/* Vconn supply */ +static char *vconn_supply_text[] = { + "n", "y" +}; + +static ssize_t dual_role_show_property(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev); + const ptrdiff_t off = attr - dual_role_attrs; + unsigned int value; + + if (off == DUAL_ROLE_PROP_SUPPORTED_MODES) { + value = dual_role->desc->supported_modes; + } else { + ret = dual_role_get_property(dual_role, off, &value); + + if (ret < 0) { + if (ret == -ENODATA) + dev_dbg(dev, + "driver has no data for `%s' property\n", + attr->attr.name); + else if (ret != -ENODEV) + dev_err(dev, + "driver failed to report `%s' property: %zd\n", + attr->attr.name, ret); + return ret; + } + } + + if (off == DUAL_ROLE_PROP_SUPPORTED_MODES) { + BUILD_BUG_ON(DUAL_ROLE_PROP_SUPPORTED_MODES_TOTAL != + ARRAY_SIZE(supported_modes_text)); + if (value < DUAL_ROLE_PROP_SUPPORTED_MODES_TOTAL) + return snprintf(buf, PAGE_SIZE, "%s\n", + supported_modes_text[value]); + else + return -EIO; + } else if (off == DUAL_ROLE_PROP_MODE) { + BUILD_BUG_ON(DUAL_ROLE_PROP_MODE_TOTAL != + ARRAY_SIZE(mode_text)); + if (value < DUAL_ROLE_PROP_MODE_TOTAL) + return snprintf(buf, PAGE_SIZE, "%s\n", + mode_text[value]); + else + return -EIO; + } else if (off == DUAL_ROLE_PROP_PR) { + BUILD_BUG_ON(DUAL_ROLE_PROP_PR_TOTAL != ARRAY_SIZE(pr_text)); + if (value < DUAL_ROLE_PROP_PR_TOTAL) + return snprintf(buf, PAGE_SIZE, "%s\n", + pr_text[value]); + else + return -EIO; + } else if (off == DUAL_ROLE_PROP_DR) { + BUILD_BUG_ON(DUAL_ROLE_PROP_DR_TOTAL != ARRAY_SIZE(dr_text)); + if (value < DUAL_ROLE_PROP_DR_TOTAL) + return snprintf(buf, PAGE_SIZE, "%s\n", + dr_text[value]); + else + return -EIO; + } else if (off == DUAL_ROLE_PROP_VCONN_SUPPLY) { + BUILD_BUG_ON(DUAL_ROLE_PROP_VCONN_SUPPLY_TOTAL != + ARRAY_SIZE(vconn_supply_text)); + if (value < DUAL_ROLE_PROP_VCONN_SUPPLY_TOTAL) + return snprintf(buf, PAGE_SIZE, "%s\n", + vconn_supply_text[value]); + else + return -EIO; + } else + return -EIO; +} + +static ssize_t dual_role_store_property(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + ssize_t ret; + struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev); + const ptrdiff_t off = attr - dual_role_attrs; + unsigned int value; + int total, i; + char *dup_buf, **text_array; + bool result = false; + + dup_buf = kstrdupcase(buf, GFP_KERNEL, false); + switch (off) { + case DUAL_ROLE_PROP_MODE: + total = DUAL_ROLE_PROP_MODE_TOTAL; + text_array = mode_text; + break; + case DUAL_ROLE_PROP_PR: + total = DUAL_ROLE_PROP_PR_TOTAL; + text_array = pr_text; + break; + case DUAL_ROLE_PROP_DR: + total = DUAL_ROLE_PROP_DR_TOTAL; + text_array = dr_text; + break; + case DUAL_ROLE_PROP_VCONN_SUPPLY: + ret = strtobool(dup_buf, &result); + value = result; + if (!ret) + goto setprop; + default: + ret = -EINVAL; + goto error; + } + + for (i = 0; i <= total; i++) { + if (i == total) { + ret = -ENOTSUPP; + goto error; + } + if (!strncmp(*(text_array + i), dup_buf, + strlen(*(text_array + i)))) { + value = i; + break; + } + } + +setprop: + ret = dual_role->desc->set_property(dual_role, off, &value); + +error: + kfree(dup_buf); + + if (ret < 0) + return ret; + + return count; +} + +static umode_t dual_role_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int attrno) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev); + umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; + int i; + + if (attrno == DUAL_ROLE_PROP_SUPPORTED_MODES) + return mode; + + for (i = 0; i < dual_role->desc->num_properties; i++) { + int property = dual_role->desc->properties[i]; + + if (property == attrno) { + if (dual_role->desc->property_is_writeable && + dual_role_property_is_writeable(dual_role, property) + > 0) + mode |= S_IWUSR; + + return mode; + } + } + + return 0; +} + +static struct attribute *__dual_role_attrs[ARRAY_SIZE(dual_role_attrs) + 1]; + +static struct attribute_group dual_role_attr_group = { + .attrs = __dual_role_attrs, + .is_visible = dual_role_attr_is_visible, +}; + +static const struct attribute_group *dual_role_attr_groups[] = { + &dual_role_attr_group, + NULL, +}; + +void dual_role_init_attrs(struct device_type *dev_type) +{ + int i; + + dev_type->groups = dual_role_attr_groups; + + for (i = 0; i < ARRAY_SIZE(dual_role_attrs); i++) + __dual_role_attrs[i] = &dual_role_attrs[i].attr; +} + +int dual_role_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev); + int ret = 0, j; + char *prop_buf; + char *attrname; + + dev_dbg(dev, "uevent\n"); + + if (!dual_role || !dual_role->desc) { + dev_dbg(dev, "No dual_role phy yet\n"); + return ret; + } + + dev_dbg(dev, "DUAL_ROLE_NAME=%s\n", dual_role->desc->name); + + ret = add_uevent_var(env, "DUAL_ROLE_NAME=%s", dual_role->desc->name); + if (ret) + return ret; + + prop_buf = (char *)get_zeroed_page(GFP_KERNEL); + if (!prop_buf) + return -ENOMEM; + + for (j = 0; j < dual_role->desc->num_properties; j++) { + struct device_attribute *attr; + char *line; + + attr = &dual_role_attrs[dual_role->desc->properties[j]]; + + ret = dual_role_show_property(dev, attr, prop_buf); + if (ret == -ENODEV || ret == -ENODATA) { + ret = 0; + continue; + } + + if (ret < 0) + goto out; + line = strnchr(prop_buf, PAGE_SIZE, '\n'); + if (line) + *line = 0; + + attrname = kstrdupcase(attr->attr.name, GFP_KERNEL, true); + if (!attrname) + ret = -ENOMEM; + + dev_dbg(dev, "prop %s=%s\n", attrname, prop_buf); + + ret = add_uevent_var(env, "DUAL_ROLE_%s=%s", attrname, + prop_buf); + kfree(attrname); + if (ret) + goto out; + } + +out: + free_page((unsigned long)prop_buf); + + return ret; +} + +/******************* Module Init ***********************************/ + +static int __init dual_role_class_init(void) +{ + dual_role_class = class_create(THIS_MODULE, "dual_role_usb"); + + if (IS_ERR(dual_role_class)) + return PTR_ERR(dual_role_class); + + dual_role_class->dev_uevent = dual_role_uevent; + dual_role_init_attrs(&dual_role_dev_type); + + return 0; +} + +static void __exit dual_role_class_exit(void) +{ + class_destroy(dual_role_class); +} + +subsys_initcall(dual_role_class_init); +module_exit(dual_role_class_exit); diff --git a/include/linux/usb/class-dual-role.h b/include/linux/usb/class-dual-role.h new file mode 100644 index 000000000000..af42ed34944a --- /dev/null +++ b/include/linux/usb/class-dual-role.h @@ -0,0 +1,128 @@ +#ifndef __LINUX_CLASS_DUAL_ROLE_H__ +#define __LINUX_CLASS_DUAL_ROLE_H__ + +#include +#include +#include + +struct device; + +enum dual_role_supported_modes { + DUAL_ROLE_SUPPORTED_MODES_DFP_AND_UFP = 0, + DUAL_ROLE_SUPPORTED_MODES_DFP, + DUAL_ROLE_SUPPORTED_MODES_UFP, +/*The following should be the last element*/ + DUAL_ROLE_PROP_SUPPORTED_MODES_TOTAL, +}; + +enum { + DUAL_ROLE_PROP_MODE_UFP = 0, + DUAL_ROLE_PROP_MODE_DFP, + DUAL_ROLE_PROP_MODE_NONE, +/*The following should be the last element*/ + DUAL_ROLE_PROP_MODE_TOTAL, +}; + +enum { + DUAL_ROLE_PROP_PR_SRC = 0, + DUAL_ROLE_PROP_PR_SNK, + DUAL_ROLE_PROP_PR_NONE, +/*The following should be the last element*/ + DUAL_ROLE_PROP_PR_TOTAL, + +}; + +enum { + DUAL_ROLE_PROP_DR_HOST = 0, + DUAL_ROLE_PROP_DR_DEVICE, + DUAL_ROLE_PROP_DR_NONE, +/*The following should be the last element*/ + DUAL_ROLE_PROP_DR_TOTAL, +}; + +enum { + DUAL_ROLE_PROP_VCONN_SUPPLY_NO = 0, + DUAL_ROLE_PROP_VCONN_SUPPLY_YES, +/*The following should be the last element*/ + DUAL_ROLE_PROP_VCONN_SUPPLY_TOTAL, +}; + +enum dual_role_property { + DUAL_ROLE_PROP_SUPPORTED_MODES = 0, + DUAL_ROLE_PROP_MODE, + DUAL_ROLE_PROP_PR, + DUAL_ROLE_PROP_DR, + DUAL_ROLE_PROP_VCONN_SUPPLY, +}; + +struct dual_role_phy_instance; + +/* Description of typec port */ +struct dual_role_phy_desc { + /* /sys/class/dual_role_usb// */ + const char *name; + enum dual_role_supported_modes supported_modes; + enum dual_role_property *properties; + size_t num_properties; + + /* Callback for "cat /sys/class/dual_role_usb//" */ + int (*get_property)(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + unsigned int *val); + /* Callback for "echo > + * /sys/class/dual_role_usb//" */ + int (*set_property)(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + const unsigned int *val); + /* Decides whether userspace can change a specific property */ + int (*property_is_writeable)(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop); +}; + +struct dual_role_phy_instance { + const struct dual_role_phy_desc *desc; + + /* Driver private data */ + void *drv_data; + + struct device dev; + struct work_struct changed_work; +}; + +#if IS_ENABLED(CONFIG_DUAL_ROLE_USB_INTF) +extern void dual_role_instance_changed(struct dual_role_phy_instance + *dual_role); +extern struct dual_role_phy_instance *__must_check +devm_dual_role_instance_register(struct device *parent, + const struct dual_role_phy_desc *desc); +extern void devm_dual_role_instance_unregister(struct device *dev, + struct dual_role_phy_instance + *dual_role); +extern int dual_role_get_property(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + unsigned int *val); +extern int dual_role_set_property(struct dual_role_phy_instance *dual_role, + enum dual_role_property prop, + const unsigned int *val); +extern int dual_role_property_is_writeable(struct dual_role_phy_instance + *dual_role, + enum dual_role_property prop); +extern void *dual_role_get_drvdata(struct dual_role_phy_instance *dual_role); +#else /* CONFIG_DUAL_ROLE_USB_INTF */ +static void dual_role_instance_changed(struct dual_role_phy_instance + *dual_role){} +static struct dual_role_phy_instance *__must_check +devm_dual_role_instance_register(struct device *parent, + const struct dual_role_phy_desc *desc) +{ + return ERR_PTR(-ENOSYS); +} +static void devm_dual_role_instance_unregister(struct device *dev, + struct dual_role_phy_instance + *dual_role){} +static void *dual_role_get_drvdata(struct dual_role_phy_instance *dual_role) +{ + return ERR_PTR(-ENOSYS); +} +#endif /* CONFIG_DUAL_ROLE_USB_INTF */ +#endif /* __LINUX_CLASS_DUAL_ROLE_H__ */ -- GitLab From 9a9b1687f974a5a3a3c6ad4768c005d783404407 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Wed, 2 Sep 2015 16:38:31 +0530 Subject: [PATCH 0858/1442] ANDROID: usb: phy: fix dual role sysfs build if kernel modules are supported Add a missing ";" after EXPORT_SYMBOL() otherwise we run into following build error if Kernel Modules are supported: ---------- CC drivers/usb/phy/class-dual-role.o drivers/usb/phy/class-dual-role.c:91:1: error: expected ',' or ';' before 'int' int dual_role_get_property(struct dual_role_phy_instance *dual_role, ^ make[3]: *** [drivers/usb/phy/class-dual-role.o] Error 1 ---------- Signed-off-by: Amit Pundir --- drivers/usb/phy/class-dual-role.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/phy/class-dual-role.c b/drivers/usb/phy/class-dual-role.c index ce889dd529cb..51fcb545a9d5 100644 --- a/drivers/usb/phy/class-dual-role.c +++ b/drivers/usb/phy/class-dual-role.c @@ -86,7 +86,7 @@ void dual_role_instance_changed(struct dual_role_phy_instance *dual_role) pm_wakeup_event(&dual_role->dev, DUAL_ROLE_NOTIFICATION_TIMEOUT); schedule_work(&dual_role->changed_work); } -EXPORT_SYMBOL_GPL(dual_role_instance_changed) +EXPORT_SYMBOL_GPL(dual_role_instance_changed); int dual_role_get_property(struct dual_role_phy_instance *dual_role, enum dual_role_property prop, -- GitLab From 560f013fa069516a3ffa7a9ef76b94f933370205 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Sun, 9 Aug 2015 15:12:50 -0700 Subject: [PATCH 0859/1442] ANDROID: usb: gadget: Add device attribute to determine gadget state Android frameworks (UsbDeviceManager) relies on gadget state exported through device attributes. This CL adds the device attribute to export USB gadget state. Change-Id: Id0391810d75b58c579610fbec6e37ab22f28886d Signed-off-by: Badhri Jagan Sridharan --- drivers/usb/gadget/configfs.c | 85 +++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 33 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 39adb589bb83..2f25b2bdbff7 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1592,6 +1592,54 @@ static struct device_attribute *android_usb_attributes[] = { &dev_attr_state, NULL }; + +static int android_device_create(struct gadget_info *gi) +{ + struct device_attribute **attrs; + struct device_attribute *attr; + + INIT_WORK(&gi->work, android_work); + android_device = device_create(android_class, NULL, + MKDEV(0, 0), NULL, "android0"); + if (IS_ERR(android_device)) + return PTR_ERR(android_device); + + dev_set_drvdata(android_device, gi); + + attrs = android_usb_attributes; + while ((attr = *attrs++)) { + int err; + + err = device_create_file(android_device, attr); + if (err) { + device_destroy(android_device->class, + android_device->devt); + return err; + } + } + + return 0; +} + +static void android_device_destroy(void) +{ + struct device_attribute **attrs; + struct device_attribute *attr; + + attrs = android_usb_attributes; + while ((attr = *attrs++)) + device_remove_file(android_device, attr); + device_destroy(android_device->class, android_device->devt); +} +#else +static inline int android_device_create(struct gadget_info *gi) +{ + return 0; +} + +static inline void android_device_destroy(void) +{ +} #endif static struct config_group *gadgets_make( @@ -1645,37 +1693,14 @@ static struct config_group *gadgets_make( gi->composite.gadget_driver.function = kstrdup(name, GFP_KERNEL); gi->composite.name = gi->composite.gadget_driver.function; -#ifdef CONFIG_USB_CONFIGFS_UEVENT - INIT_WORK(&gi->work, android_work); - android_device = device_create(android_class, NULL, - MKDEV(0, 0), NULL, "android0"); - if (IS_ERR(android_device)) + if (!gi->composite.gadget_driver.function) goto err; - dev_set_drvdata(android_device, gi); - - attrs = android_usb_attributes; - while ((attr = *attrs++)) { - err = device_create_file(android_device, attr); - if (err) - goto err1; - } -#endif - - if (!gi->composite.gadget_driver.function) - goto err1; + if (android_device_create(gi) < 0) + goto err; return &gi->group; -err1: -#ifdef CONFIG_USB_CONFIGFS_UEVENT - attrs = android_usb_attributes; - while ((attr = *attrs++)) - device_remove_file(android_device, attr); - - device_destroy(android_device->class, - android_device->devt); -#endif err: kfree(gi); return ERR_PTR(-ENOMEM); @@ -1687,13 +1712,7 @@ static void gadgets_drop(struct config_group *group, struct config_item *item) struct device_attribute *attr; config_item_put(item); - -#ifdef CONFIG_USB_CONFIGFS_UEVENT - attrs = android_usb_attributes; - while ((attr = *attrs++)) - device_remove_file(android_device, attr); - device_destroy(android_device->class, android_device->devt); -#endif + android_device_destroy(); } static struct configfs_group_operations gadgets_ops = { -- GitLab From bba0df3964cafc3e6834e8c97a212e3546d26b40 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 2 Sep 2015 22:49:10 -0700 Subject: [PATCH 0860/1442] ANDROID: usb: gadget: create F_midi device Android frameworks relies on the alsa config reported by the f_midi device. Signed-off-by: Badhri Jagan Sridharan Change-Id: I0695e00b166fd953f50acea93802245b0d5a5240 --- drivers/usb/gadget/function/f_midi.c | 65 ++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index a5719f271bf0..f23a6f7cd347 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c @@ -1167,6 +1167,65 @@ static void f_midi_free_inst(struct usb_function_instance *f) kfree(opts); } +#ifdef CONFIG_USB_CONFIGFS_UEVENT +extern struct device *create_function_device(char *name); +static ssize_t alsa_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct usb_function_instance *fi_midi = dev_get_drvdata(dev); + struct f_midi *midi; + + if (!fi_midi->f) + dev_warn(dev, "f_midi: function not set\n"); + + if (fi_midi && fi_midi->f) { + midi = func_to_midi(fi_midi->f); + if (midi->rmidi && midi->rmidi->card) + return sprintf(buf, "%d %d\n", + midi->rmidi->card->number, midi->rmidi->device); + } + + /* print PCM card and device numbers */ + return sprintf(buf, "%d %d\n", -1, -1); +} + +static DEVICE_ATTR(alsa, S_IRUGO, alsa_show, NULL); + +static struct device_attribute *alsa_function_attributes[] = { + &dev_attr_alsa, + NULL +}; + +static int create_alsa_device(struct usb_function_instance *fi) +{ + struct device *dev; + struct device_attribute **attrs; + struct device_attribute *attr; + int err = 0; + + dev = create_function_device("f_midi"); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + attrs = alsa_function_attributes; + if (attrs) { + while ((attr = *attrs++) && !err) + err = device_create_file(dev, attr); + if (err) { + device_destroy(dev->class, dev->devt); + return -EINVAL; + } + } + dev_set_drvdata(dev, fi); + return 0; +} +#else +static int create_alsa_device(struct usb_function_instance *fi) +{ + return 0; +} +#endif + static struct usb_function_instance *f_midi_alloc_inst(void) { struct f_midi_opts *opts; @@ -1184,6 +1243,11 @@ static struct usb_function_instance *f_midi_alloc_inst(void) opts->in_ports = 1; opts->out_ports = 1; + if (create_alsa_device(&opts->func_inst)) { + kfree(opts); + return ERR_PTR(-ENODEV); + } + config_group_init_type_name(&opts->func_inst.group, "", &midi_func_type); @@ -1280,6 +1344,7 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi) midi->func.disable = f_midi_disable; midi->func.free_func = f_midi_free; + fi->f = &midi->func; return &midi->func; setup_fail: -- GitLab From 9214c899f7304cae8410f67af8eeb04776bf1349 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 6 Oct 2015 20:53:27 +0530 Subject: [PATCH 0861/1442] ANDROID: usb: gadget: configfs: handle gadget reset request for android There is this new mandatory UDC->reset API in v3.18+ kernels, commit ef979a26 "usb: gadget: add reset API at usb_gadget_driver". Let android_disconnect handle that for Android, similar to how composite_disconnect is handling the generic ConfigFS gadget reset request. Signed-off-by: Amit Pundir --- drivers/usb/gadget/configfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 2f25b2bdbff7..bc1c1415861e 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1542,6 +1542,7 @@ static const struct usb_gadget_driver configfs_driver_template = { .unbind = configfs_composite_unbind, #ifdef CONFIG_USB_CONFIGFS_UEVENT .setup = android_setup, + .reset = android_disconnect, .disconnect = android_disconnect, #else .setup = composite_setup, -- GitLab From 95106e3d2071e73d2ea76b48e1d0a58617f18964 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 30 Oct 2015 03:00:20 +0530 Subject: [PATCH 0862/1442] ANDROID: usb: gadget: build audio_source function only if SND is enabled Also select SND_PCM while building f_audio_source otherwise we run into following build error: LD init/built-in.o drivers/built-in.o: In function `audio_data_complete': /linaro/android/kernel/linaro-android/drivers/usb/gadget/function/f_audio_source.c:458: undefined reference to `snd_pcm_period_elapsed' drivers/built-in.o: In function `audio_pcm_hw_free': /linaro/android/kernel/linaro-android/drivers/usb/gadget/function/f_audio_source.c:770: undefined reference to `snd_pcm_lib_free_vmalloc_buffer' drivers/built-in.o: In function `snd_pcm_lib_alloc_vmalloc_buffer': /linaro/android/kernel/linaro-android/include/sound/pcm.h:1179: undefined reference to `_snd_pcm_lib_alloc_vmalloc_buffer' drivers/built-in.o: In function `audio_pcm_open': /linaro/android/kernel/linaro-android/drivers/usb/gadget/function/f_audio_source.c:734: undefined reference to `snd_pcm_limit_hw_rates' drivers/built-in.o: In function `snd_card_setup': /linaro/android/kernel/linaro-android/drivers/usb/gadget/function/f_audio_source.c:888: undefined reference to `snd_pcm_new' /linaro/android/kernel/linaro-android/drivers/usb/gadget/function/f_audio_source.c:898: undefined reference to `snd_pcm_set_ops' /linaro/android/kernel/linaro-android/drivers/usb/gadget/function/f_audio_source.c:899: undefined reference to `snd_pcm_lib_preallocate_pages_for_all' drivers/built-in.o:(.data+0x1fd28): undefined reference to `snd_pcm_lib_ioctl' make: *** [vmlinux] Error 1 Signed-off-by: Amit Pundir --- drivers/usb/gadget/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 7e7402af6edc..3684775677eb 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -398,6 +398,8 @@ config USB_CONFIGFS_F_ACC config USB_CONFIGFS_F_AUDIO_SRC boolean "Audio Source gadget" depends on USB_CONFIGFS && USB_CONFIGFS_F_ACC + depends on SND + select SND_PCM select USB_F_AUDIO_SRC help USB gadget Audio Source support -- GitLab From e45c769fa7af7cc4e5d55b8eafd24a65fee32116 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 25 Aug 2015 13:09:31 +0530 Subject: [PATCH 0863/1442] ANDROID: usb: gadget: cleanup: fix unused variable and function warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused variables and functions to fix following build warnings: CC drivers/usb/gadget/configfs.o drivers/usb/gadget/configfs.c: In function ‘gadgets_make’: drivers/usb/gadget/configfs.c:1710:6: warning: unused variable ‘err’ [-Wunused-variable] int err; ^ drivers/usb/gadget/configfs.c:1709:27: warning: unused variable ‘attr’ [-Wunused-variable] struct device_attribute *attr; ^ drivers/usb/gadget/configfs.c:1708:28: warning: unused variable ‘attrs’ [-Wunused-variable] struct device_attribute **attrs; ^ drivers/usb/gadget/configfs.c: In function ‘gadgets_drop’: drivers/usb/gadget/configfs.c:1774:27: warning: unused variable ‘attr’ [-Wunused-variable] struct device_attribute *attr; ^ drivers/usb/gadget/configfs.c:1773:28: warning: unused variable ‘attrs’ [-Wunused-variable] struct device_attribute **attrs; ^ ... CC drivers/usb/gadget/function/f_mtp.o drivers/usb/gadget/function/f_mtp.c:1219:12: warning: ‘mtp_bind_config’ defined but not used [-Wunused-function] static int mtp_bind_config(struct usb_configuration *c, bool ptp_config) ^ drivers/usb/gadget/function/f_mtp.c:1300:12: warning: ‘mtp_setup’ defined but not used [-Wunused-function] static int mtp_setup(void) ^ ... CC drivers/usb/gadget/function/f_accessory.o drivers/usb/gadget/function/f_accessory.c:969:1: warning: ‘acc_function_bind’ defined but not used [-Wunused-function] acc_function_bind(struct usb_configuration *c, struct usb_function *f) { ^ drivers/usb/gadget/function/f_accessory.c:1172:12: warning: ‘acc_bind_config’ defined but not used [-Wunused-function] static int acc_bind_config(struct usb_configuration *c) ^ Signed-off-by: Amit Pundir --- drivers/usb/gadget/configfs.c | 6 ---- drivers/usb/gadget/function/f_accessory.c | 34 -------------------- drivers/usb/gadget/function/f_mtp.c | 39 ----------------------- 3 files changed, 79 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index bc1c1415861e..44746a4fff1d 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1648,9 +1648,6 @@ static struct config_group *gadgets_make( const char *name) { struct gadget_info *gi; - struct device_attribute **attrs; - struct device_attribute *attr; - int err; gi = kzalloc(sizeof(*gi), GFP_KERNEL); if (!gi) @@ -1709,9 +1706,6 @@ static struct config_group *gadgets_make( static void gadgets_drop(struct config_group *group, struct config_item *item) { - struct device_attribute **attrs; - struct device_attribute *attr; - config_item_put(item); android_device_destroy(); } diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index d9db5c595b99..07732395eecb 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -943,11 +943,6 @@ __acc_function_bind(struct usb_configuration *c, return 0; } -static int -acc_function_bind(struct usb_configuration *c, struct usb_function *f) { - return __acc_function_bind(c, f, false); -} - static int acc_function_bind_configfs(struct usb_configuration *c, struct usb_function *f) { @@ -1147,35 +1142,6 @@ static void acc_function_disable(struct usb_function *f) VDBG(cdev, "%s disabled\n", dev->function.name); } -static int acc_bind_config(struct usb_configuration *c) -{ - struct acc_dev *dev = _acc_dev; - int ret; - - printk(KERN_INFO "acc_bind_config\n"); - - /* allocate a string ID for our interface */ - if (acc_string_defs[INTERFACE_STRING_INDEX].id == 0) { - ret = usb_string_id(c->cdev); - if (ret < 0) - return ret; - acc_string_defs[INTERFACE_STRING_INDEX].id = ret; - acc_interface_desc.iInterface = ret; - } - - dev->cdev = c->cdev; - dev->function.name = "accessory"; - dev->function.strings = acc_strings, - dev->function.fs_descriptors = fs_acc_descs; - dev->function.hs_descriptors = hs_acc_descs; - dev->function.bind = acc_function_bind; - dev->function.unbind = acc_function_unbind; - dev->function.set_alt = acc_function_set_alt; - dev->function.disable = acc_function_disable; - - return usb_add_function(c, &dev->function); -} - static int acc_setup(void) { struct acc_dev *dev; diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index 03a61f8b9d48..aec7b8d61fe7 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -1216,40 +1216,6 @@ static void mtp_function_disable(struct usb_function *f) VDBG(cdev, "%s disabled\n", dev->function.name); } -static int mtp_bind_config(struct usb_configuration *c, bool ptp_config) -{ - struct mtp_dev *dev = _mtp_dev; - int ret = 0; - - printk(KERN_INFO "mtp_bind_config\n"); - - /* allocate a string ID for our interface */ - if (mtp_string_defs[INTERFACE_STRING_INDEX].id == 0) { - ret = usb_string_id(c->cdev); - if (ret < 0) - return ret; - mtp_string_defs[INTERFACE_STRING_INDEX].id = ret; - mtp_interface_desc.iInterface = ret; - } - - dev->cdev = c->cdev; - dev->function.name = DRIVER_NAME; - dev->function.strings = mtp_strings; - if (ptp_config) { - dev->function.fs_descriptors = fs_ptp_descs; - dev->function.hs_descriptors = hs_ptp_descs; - } else { - dev->function.fs_descriptors = fs_mtp_descs; - dev->function.hs_descriptors = hs_mtp_descs; - } - dev->function.bind = mtp_function_bind; - dev->function.unbind = mtp_function_unbind; - dev->function.set_alt = mtp_function_set_alt; - dev->function.disable = mtp_function_disable; - - return usb_add_function(c, &dev->function); -} - static int __mtp_setup(struct mtp_instance *fi_mtp) { struct mtp_dev *dev; @@ -1297,11 +1263,6 @@ static int __mtp_setup(struct mtp_instance *fi_mtp) return ret; } -static int mtp_setup(void) -{ - return __mtp_setup(NULL); -} - static int mtp_setup_configfs(struct mtp_instance *fi_mtp) { return __mtp_setup(fi_mtp); -- GitLab From 6427ac4392362500d832713fa6090d5b12702398 Mon Sep 17 00:00:00 2001 From: keunyoung Date: Wed, 29 Jan 2014 12:41:50 -0800 Subject: [PATCH 0864/1442] ANDROID: fix false disconnect due to a signal sent to the reading process - In the current implementation, when a signal is sent to the reading process, read is cancelled by calling usb_ep_dequeue, which lead into calling acc_complete_out with ECONNRESET, but the current logic treats it as disconnection, which makes the device inaccessible until cable is actually disconnected. - The fix calls disconnect only when ESHUTDOWN error is passed. - If data has already arrived while trying cancelling, the data is marked as available, and it will be read out on the next read. This is necessary as USB bulk is assumed to guarantee no data loss. Signed-off-by: keunyoung --- drivers/usb/gadget/function/f_accessory.c | 32 +++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index 07732395eecb..1be93a7ca4a1 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -270,8 +270,10 @@ static void acc_complete_in(struct usb_ep *ep, struct usb_request *req) { struct acc_dev *dev = _acc_dev; - if (req->status != 0) + if (req->status == -ESHUTDOWN) { + pr_debug("acc_complete_in set disconnected"); acc_set_disconnected(dev); + } req_put(dev, &dev->tx_idle, req); @@ -283,8 +285,10 @@ static void acc_complete_out(struct usb_ep *ep, struct usb_request *req) struct acc_dev *dev = _acc_dev; dev->rx_done = 1; - if (req->status != 0) + if (req->status == -ESHUTDOWN) { + pr_debug("acc_complete_out set disconnected"); acc_set_disconnected(dev); + } wake_up(&dev->read_wq); } @@ -567,8 +571,10 @@ static ssize_t acc_read(struct file *fp, char __user *buf, pr_debug("acc_read(%zu)\n", count); - if (dev->disconnected) + if (dev->disconnected) { + pr_debug("acc_read disconnected"); return -ENODEV; + } if (count > BULK_BUFFER_SIZE) count = BULK_BUFFER_SIZE; @@ -581,6 +587,12 @@ static ssize_t acc_read(struct file *fp, char __user *buf, goto done; } + if (dev->rx_done) { + // last req cancelled. try to get it. + req = dev->rx_req[0]; + goto copy_data; + } + requeue_req: /* queue a request */ req = dev->rx_req[0]; @@ -598,9 +610,17 @@ static ssize_t acc_read(struct file *fp, char __user *buf, ret = wait_event_interruptible(dev->read_wq, dev->rx_done); if (ret < 0) { r = ret; - usb_ep_dequeue(dev->ep_out, req); + ret = usb_ep_dequeue(dev->ep_out, req); + if (ret != 0) { + // cancel failed. There can be a data already received. + // it will be retrieved in the next read. + pr_debug("acc_read: cancelling failed %d", ret); + } goto done; } + +copy_data: + dev->rx_done = 0; if (dev->online) { /* If we got a 0-len packet, throw it back and try again. */ if (req->actual == 0) @@ -630,8 +650,10 @@ static ssize_t acc_write(struct file *fp, const char __user *buf, pr_debug("acc_write(%zu)\n", count); - if (!dev->online || dev->disconnected) + if (!dev->online || dev->disconnected) { + pr_debug("acc_write disconnected or not online"); return -ENODEV; + } while (count > 0) { if (!dev->online) { -- GitLab From 8a05267650b44231c1193fdd41bd1cdf22d82309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Mon, 27 Sep 2010 17:50:00 -0700 Subject: [PATCH 0865/1442] ANDROID: ARM: Add fiq_glue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I27d2554e07d9de204e0a06696d38db51608d9f6b Signed-off-by: Arve Hjønnevåg Signed-off-by: Colin Cross --- arch/arm/common/Kconfig | 4 ++ arch/arm/common/Makefile | 1 + arch/arm/common/fiq_glue.S | 111 +++++++++++++++++++++++++++++++ arch/arm/common/fiq_glue_setup.c | 100 ++++++++++++++++++++++++++++ arch/arm/include/asm/fiq_glue.h | 30 +++++++++ 5 files changed, 246 insertions(+) create mode 100644 arch/arm/common/fiq_glue.S create mode 100644 arch/arm/common/fiq_glue_setup.c create mode 100644 arch/arm/include/asm/fiq_glue.h diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig index 9353184d730d..ce01364a96e3 100644 --- a/arch/arm/common/Kconfig +++ b/arch/arm/common/Kconfig @@ -17,3 +17,7 @@ config SHARP_PARAM config SHARP_SCOOP bool + +config FIQ_GLUE + bool + select FIQ diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 27f23b15b1ea..04aca896b338 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -4,6 +4,7 @@ obj-y += firmware.o +obj-$(CONFIG_FIQ_GLUE) += fiq_glue.o fiq_glue_setup.o obj-$(CONFIG_ICST) += icst.o obj-$(CONFIG_SA1111) += sa1111.o obj-$(CONFIG_DMABOUNCE) += dmabounce.o diff --git a/arch/arm/common/fiq_glue.S b/arch/arm/common/fiq_glue.S new file mode 100644 index 000000000000..9e3455a09f8f --- /dev/null +++ b/arch/arm/common/fiq_glue.S @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include + + .text + + .global fiq_glue_end + + /* fiq stack: r0-r15,cpsr,spsr of interrupted mode */ + +ENTRY(fiq_glue) + /* store pc, cpsr from previous mode */ + mrs r12, spsr + sub r11, lr, #4 + subs r10, #1 + bne nested_fiq + + stmfd sp!, {r11-r12, lr} + + /* store r8-r14 from previous mode */ + sub sp, sp, #(7 * 4) + stmia sp, {r8-r14}^ + nop + + /* store r0-r7 from previous mode */ + stmfd sp!, {r0-r7} + + /* setup func(data,regs) arguments */ + mov r0, r9 + mov r1, sp + mov r3, r8 + + mov r7, sp + + /* Get sp and lr from non-user modes */ + and r4, r12, #MODE_MASK + cmp r4, #USR_MODE + beq fiq_from_usr_mode + + mov r7, sp + orr r4, r4, #(PSR_I_BIT | PSR_F_BIT) + msr cpsr_c, r4 + str sp, [r7, #(4 * 13)] + str lr, [r7, #(4 * 14)] + mrs r5, spsr + str r5, [r7, #(4 * 17)] + + cmp r4, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT) + /* use fiq stack if we reenter this mode */ + subne sp, r7, #(4 * 3) + +fiq_from_usr_mode: + msr cpsr_c, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT) + mov r2, sp + sub sp, r7, #12 + stmfd sp!, {r2, ip, lr} + /* call func(data,regs) */ + blx r3 + ldmfd sp, {r2, ip, lr} + mov sp, r2 + + /* restore/discard saved state */ + cmp r4, #USR_MODE + beq fiq_from_usr_mode_exit + + msr cpsr_c, r4 + ldr sp, [r7, #(4 * 13)] + ldr lr, [r7, #(4 * 14)] + msr spsr_cxsf, r5 + +fiq_from_usr_mode_exit: + msr cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT) + + ldmfd sp!, {r0-r7} + add sp, sp, #(7 * 4) + ldmfd sp!, {r11-r12, lr} +exit_fiq: + msr spsr_cxsf, r12 + add r10, #1 + movs pc, r11 + +nested_fiq: + orr r12, r12, #(PSR_F_BIT) + b exit_fiq + +fiq_glue_end: + +ENTRY(fiq_glue_setup) /* func, data, sp */ + mrs r3, cpsr + msr cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT) + movs r8, r0 + mov r9, r1 + mov sp, r2 + moveq r10, #0 + movne r10, #1 + msr cpsr_c, r3 + bx lr + diff --git a/arch/arm/common/fiq_glue_setup.c b/arch/arm/common/fiq_glue_setup.c new file mode 100644 index 000000000000..4044c7db95c8 --- /dev/null +++ b/arch/arm/common/fiq_glue_setup.c @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include + +extern unsigned char fiq_glue, fiq_glue_end; +extern void fiq_glue_setup(void *func, void *data, void *sp); + +static struct fiq_handler fiq_debbuger_fiq_handler = { + .name = "fiq_glue", +}; +DEFINE_PER_CPU(void *, fiq_stack); +static struct fiq_glue_handler *current_handler; +static DEFINE_MUTEX(fiq_glue_lock); + +static void fiq_glue_setup_helper(void *info) +{ + struct fiq_glue_handler *handler = info; + fiq_glue_setup(handler->fiq, handler, + __get_cpu_var(fiq_stack) + THREAD_START_SP); +} + +int fiq_glue_register_handler(struct fiq_glue_handler *handler) +{ + int ret; + int cpu; + + if (!handler || !handler->fiq) + return -EINVAL; + + mutex_lock(&fiq_glue_lock); + if (fiq_stack) { + ret = -EBUSY; + goto err_busy; + } + + for_each_possible_cpu(cpu) { + void *stack; + stack = (void *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); + if (WARN_ON(!stack)) { + ret = -ENOMEM; + goto err_alloc_fiq_stack; + } + per_cpu(fiq_stack, cpu) = stack; + } + + ret = claim_fiq(&fiq_debbuger_fiq_handler); + if (WARN_ON(ret)) + goto err_claim_fiq; + + current_handler = handler; + on_each_cpu(fiq_glue_setup_helper, handler, true); + set_fiq_handler(&fiq_glue, &fiq_glue_end - &fiq_glue); + + mutex_unlock(&fiq_glue_lock); + return 0; + +err_claim_fiq: +err_alloc_fiq_stack: + for_each_possible_cpu(cpu) { + __free_pages(per_cpu(fiq_stack, cpu), THREAD_SIZE_ORDER); + per_cpu(fiq_stack, cpu) = NULL; + } +err_busy: + mutex_unlock(&fiq_glue_lock); + return ret; +} + +/** + * fiq_glue_resume - Restore fiqs after suspend or low power idle states + * + * This must be called before calling local_fiq_enable after returning from a + * power state where the fiq mode registers were lost. If a driver provided + * a resume hook when it registered the handler it will be called. + */ + +void fiq_glue_resume(void) +{ + if (!current_handler) + return; + fiq_glue_setup(current_handler->fiq, current_handler, + __get_cpu_var(fiq_stack) + THREAD_START_SP); + if (current_handler->resume) + current_handler->resume(current_handler); +} + diff --git a/arch/arm/include/asm/fiq_glue.h b/arch/arm/include/asm/fiq_glue.h new file mode 100644 index 000000000000..d54c29db97a8 --- /dev/null +++ b/arch/arm/include/asm/fiq_glue.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __ASM_FIQ_GLUE_H +#define __ASM_FIQ_GLUE_H + +struct fiq_glue_handler { + void (*fiq)(struct fiq_glue_handler *h, void *regs, void *svc_sp); + void (*resume)(struct fiq_glue_handler *h); +}; + +int fiq_glue_register_handler(struct fiq_glue_handler *handler); + +#ifdef CONFIG_FIQ_GLUE +void fiq_glue_resume(void); +#else +static inline void fiq_glue_resume(void) {} +#endif + +#endif -- GitLab From 8c0dbf608d25f87d80d9f26b5242155d4624eecc Mon Sep 17 00:00:00 2001 From: Iliyan Malchev Date: Sat, 5 Jun 2010 17:36:24 -0700 Subject: [PATCH 0866/1442] ANDROID: ARM: Add generic fiq serial debugger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Ibb536c88f0dbaf4766d0599296907e35e42cbfd6 Signed-off-by: Iliyan Malchev Signed-off-by: Arve Hjønnevåg --- arch/arm/common/Kconfig | 46 + arch/arm/common/Makefile | 1 + arch/arm/common/fiq_debugger.c | 1196 ++++++++++++++++++++++++ arch/arm/common/fiq_debugger_ringbuf.h | 94 ++ arch/arm/include/asm/fiq_debugger.h | 64 ++ 5 files changed, 1401 insertions(+) create mode 100644 arch/arm/common/fiq_debugger.c create mode 100644 arch/arm/common/fiq_debugger_ringbuf.h create mode 100644 arch/arm/include/asm/fiq_debugger.h diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig index ce01364a96e3..992d4046bb8a 100644 --- a/arch/arm/common/Kconfig +++ b/arch/arm/common/Kconfig @@ -21,3 +21,49 @@ config SHARP_SCOOP config FIQ_GLUE bool select FIQ + +config FIQ_DEBUGGER + bool "FIQ Mode Serial Debugger" + select FIQ + select FIQ_GLUE + default n + help + The FIQ serial debugger can accept commands even when the + kernel is unresponsive due to being stuck with interrupts + disabled. + + +config FIQ_DEBUGGER_NO_SLEEP + bool "Keep serial debugger active" + depends on FIQ_DEBUGGER + default n + help + Enables the serial debugger at boot. Passing + fiq_debugger.no_sleep on the kernel commandline will + override this config option. + +config FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON + bool "Don't disable wakeup IRQ when debugger is active" + depends on FIQ_DEBUGGER + default n + help + Don't disable the wakeup irq when enabling the uart clock. This will + cause extra interrupts, but it makes the serial debugger usable with + on some MSM radio builds that ignore the uart clock request in power + collapse. + +config FIQ_DEBUGGER_CONSOLE + bool "Console on FIQ Serial Debugger port" + depends on FIQ_DEBUGGER + default n + help + Enables a console so that printk messages are displayed on + the debugger serial port as the occur. + +config FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE + bool "Put the FIQ debugger into console mode by default" + depends on FIQ_DEBUGGER_CONSOLE + default n + help + If enabled, this puts the fiq debugger into console mode by default. + Otherwise, the fiq debugger will start out in debug mode. diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 04aca896b338..707dcdf629d0 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -4,6 +4,7 @@ obj-y += firmware.o +obj-$(CONFIG_FIQ_DEBUGGER) += fiq_debugger.o obj-$(CONFIG_FIQ_GLUE) += fiq_glue.o fiq_glue_setup.o obj-$(CONFIG_ICST) += icst.o obj-$(CONFIG_SA1111) += sa1111.o diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c new file mode 100644 index 000000000000..3ed18ae2ed80 --- /dev/null +++ b/arch/arm/common/fiq_debugger.c @@ -0,0 +1,1196 @@ +/* + * arch/arm/common/fiq_debugger.c + * + * Serial Debugger Interface accessed through an FIQ interrupt. + * + * Copyright (C) 2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +#include "fiq_debugger_ringbuf.h" + +#define DEBUG_MAX 64 +#define MAX_UNHANDLED_FIQ_COUNT 1000000 + +#define THREAD_INFO(sp) ((struct thread_info *) \ + ((unsigned long)(sp) & ~(THREAD_SIZE - 1))) + +struct fiq_debugger_state { + struct fiq_glue_handler handler; + + int fiq; + int uart_irq; + int signal_irq; + int wakeup_irq; + bool wakeup_irq_no_set_wake; + struct clk *clk; + struct fiq_debugger_pdata *pdata; + struct platform_device *pdev; + + char debug_cmd[DEBUG_MAX]; + int debug_busy; + int debug_abort; + + char debug_buf[DEBUG_MAX]; + int debug_count; + + bool no_sleep; + bool debug_enable; + bool ignore_next_wakeup_irq; + struct timer_list sleep_timer; + spinlock_t sleep_timer_lock; + bool uart_enabled; + struct wake_lock debugger_wake_lock; + bool console_enable; + int current_cpu; + atomic_t unhandled_fiq_count; + bool in_fiq; + +#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE + struct console console; + struct tty_driver *tty_driver; + struct tty_struct *tty; + int tty_open_count; + struct fiq_debugger_ringbuf *tty_rbuf; + bool syslog_dumping; +#endif + + unsigned int last_irqs[NR_IRQS]; + unsigned int last_local_timer_irqs[NR_CPUS]; +}; + +#ifdef CONFIG_FIQ_DEBUGGER_NO_SLEEP +static bool initial_no_sleep = true; +#else +static bool initial_no_sleep; +#endif + +#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE +static bool initial_debug_enable = true; +static bool initial_console_enable = true; +#else +static bool initial_debug_enable; +static bool initial_console_enable; +#endif + +module_param_named(no_sleep, initial_no_sleep, bool, 0644); +module_param_named(debug_enable, initial_debug_enable, bool, 0644); +module_param_named(console_enable, initial_console_enable, bool, 0644); + +#ifdef CONFIG_FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON +static inline void enable_wakeup_irq(struct fiq_debugger_state *state) {} +static inline void disable_wakeup_irq(struct fiq_debugger_state *state) {} +#else +static inline void enable_wakeup_irq(struct fiq_debugger_state *state) +{ + if (state->wakeup_irq < 0) + return; + enable_irq(state->wakeup_irq); + if (!state->wakeup_irq_no_set_wake) + enable_irq_wake(state->wakeup_irq); +} +static inline void disable_wakeup_irq(struct fiq_debugger_state *state) +{ + if (state->wakeup_irq < 0) + return; + disable_irq_nosync(state->wakeup_irq); + if (!state->wakeup_irq_no_set_wake) + disable_irq_wake(state->wakeup_irq); +} +#endif + +static bool inline debug_have_fiq(struct fiq_debugger_state *state) +{ + return (state->fiq >= 0); +} + +static void debug_force_irq(struct fiq_debugger_state *state) +{ + unsigned int irq = state->signal_irq; + + if (WARN_ON(!debug_have_fiq(state))) + return; + if (state->pdata->force_irq) { + state->pdata->force_irq(state->pdev, irq); + } else { + struct irq_chip *chip = irq_get_chip(irq); + if (chip && chip->irq_retrigger) + chip->irq_retrigger(irq_get_irq_data(irq)); + } +} + +static void debug_uart_enable(struct fiq_debugger_state *state) +{ + if (state->clk) + clk_enable(state->clk); + if (state->pdata->uart_enable) + state->pdata->uart_enable(state->pdev); +} + +static void debug_uart_disable(struct fiq_debugger_state *state) +{ + if (state->pdata->uart_disable) + state->pdata->uart_disable(state->pdev); + if (state->clk) + clk_disable(state->clk); +} + +static void debug_uart_flush(struct fiq_debugger_state *state) +{ + if (state->pdata->uart_flush) + state->pdata->uart_flush(state->pdev); +} + +static void debug_puts(struct fiq_debugger_state *state, char *s) +{ + unsigned c; + while ((c = *s++)) { + if (c == '\n') + state->pdata->uart_putc(state->pdev, '\r'); + state->pdata->uart_putc(state->pdev, c); + } +} + +static void debug_prompt(struct fiq_debugger_state *state) +{ + debug_puts(state, "debug> "); +} + +int log_buf_copy(char *dest, int idx, int len); +static void dump_kernel_log(struct fiq_debugger_state *state) +{ + char buf[1024]; + int idx = 0; + int ret; + int saved_oip; + + /* setting oops_in_progress prevents log_buf_copy() + * from trying to take a spinlock which will make it + * very unhappy in some cases... + */ + saved_oip = oops_in_progress; + oops_in_progress = 1; + for (;;) { + ret = log_buf_copy(buf, idx, 1023); + if (ret <= 0) + break; + buf[ret] = 0; + debug_puts(state, buf); + idx += ret; + } + oops_in_progress = saved_oip; +} + +static char *mode_name(unsigned cpsr) +{ + switch (cpsr & MODE_MASK) { + case USR_MODE: return "USR"; + case FIQ_MODE: return "FIQ"; + case IRQ_MODE: return "IRQ"; + case SVC_MODE: return "SVC"; + case ABT_MODE: return "ABT"; + case UND_MODE: return "UND"; + case SYSTEM_MODE: return "SYS"; + default: return "???"; + } +} + +static int debug_printf(void *cookie, const char *fmt, ...) +{ + struct fiq_debugger_state *state = cookie; + char buf[256]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + debug_puts(state, buf); + return state->debug_abort; +} + +/* Safe outside fiq context */ +static int debug_printf_nfiq(void *cookie, const char *fmt, ...) +{ + struct fiq_debugger_state *state = cookie; + char buf[256]; + va_list ap; + unsigned long irq_flags; + + va_start(ap, fmt); + vsnprintf(buf, 128, fmt, ap); + va_end(ap); + + local_irq_save(irq_flags); + debug_puts(state, buf); + debug_uart_flush(state); + local_irq_restore(irq_flags); + return state->debug_abort; +} + +static void dump_regs(struct fiq_debugger_state *state, unsigned *regs) +{ + debug_printf(state, " r0 %08x r1 %08x r2 %08x r3 %08x\n", + regs[0], regs[1], regs[2], regs[3]); + debug_printf(state, " r4 %08x r5 %08x r6 %08x r7 %08x\n", + regs[4], regs[5], regs[6], regs[7]); + debug_printf(state, " r8 %08x r9 %08x r10 %08x r11 %08x mode %s\n", + regs[8], regs[9], regs[10], regs[11], + mode_name(regs[16])); + if ((regs[16] & MODE_MASK) == USR_MODE) + debug_printf(state, " ip %08x sp %08x lr %08x pc %08x " + "cpsr %08x\n", regs[12], regs[13], regs[14], + regs[15], regs[16]); + else + debug_printf(state, " ip %08x sp %08x lr %08x pc %08x " + "cpsr %08x spsr %08x\n", regs[12], regs[13], + regs[14], regs[15], regs[16], regs[17]); +} + +struct mode_regs { + unsigned long sp_svc; + unsigned long lr_svc; + unsigned long spsr_svc; + + unsigned long sp_abt; + unsigned long lr_abt; + unsigned long spsr_abt; + + unsigned long sp_und; + unsigned long lr_und; + unsigned long spsr_und; + + unsigned long sp_irq; + unsigned long lr_irq; + unsigned long spsr_irq; + + unsigned long r8_fiq; + unsigned long r9_fiq; + unsigned long r10_fiq; + unsigned long r11_fiq; + unsigned long r12_fiq; + unsigned long sp_fiq; + unsigned long lr_fiq; + unsigned long spsr_fiq; +}; + +void __naked get_mode_regs(struct mode_regs *regs) +{ + asm volatile ( + "mrs r1, cpsr\n" + "msr cpsr_c, #0xd3 @(SVC_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xd7 @(ABT_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xdb @(UND_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xd2 @(IRQ_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xd1 @(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r8 - r14}\n" + "mrs r2, spsr\n" + "stmia r0!, {r2}\n" + "msr cpsr_c, r1\n" + "bx lr\n"); +} + + +static void dump_allregs(struct fiq_debugger_state *state, unsigned *regs) +{ + struct mode_regs mode_regs; + dump_regs(state, regs); + get_mode_regs(&mode_regs); + debug_printf(state, " svc: sp %08x lr %08x spsr %08x\n", + mode_regs.sp_svc, mode_regs.lr_svc, mode_regs.spsr_svc); + debug_printf(state, " abt: sp %08x lr %08x spsr %08x\n", + mode_regs.sp_abt, mode_regs.lr_abt, mode_regs.spsr_abt); + debug_printf(state, " und: sp %08x lr %08x spsr %08x\n", + mode_regs.sp_und, mode_regs.lr_und, mode_regs.spsr_und); + debug_printf(state, " irq: sp %08x lr %08x spsr %08x\n", + mode_regs.sp_irq, mode_regs.lr_irq, mode_regs.spsr_irq); + debug_printf(state, " fiq: r8 %08x r9 %08x r10 %08x r11 %08x " + "r12 %08x\n", + mode_regs.r8_fiq, mode_regs.r9_fiq, mode_regs.r10_fiq, + mode_regs.r11_fiq, mode_regs.r12_fiq); + debug_printf(state, " fiq: sp %08x lr %08x spsr %08x\n", + mode_regs.sp_fiq, mode_regs.lr_fiq, mode_regs.spsr_fiq); +} + +static void dump_irqs(struct fiq_debugger_state *state) +{ + int n; + unsigned int cpu; + + debug_printf(state, "irqnr total since-last status name\n"); + for (n = 0; n < NR_IRQS; n++) { + struct irqaction *act = irq_desc[n].action; + if (!act && !kstat_irqs(n)) + continue; + debug_printf(state, "%5d: %10u %11u %8x %s\n", n, + kstat_irqs(n), + kstat_irqs(n) - state->last_irqs[n], + irq_desc[n].status_use_accessors, + (act && act->name) ? act->name : "???"); + state->last_irqs[n] = kstat_irqs(n); + } + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + + debug_printf(state, "LOC %d: %10u %11u\n", cpu, + __IRQ_STAT(cpu, local_timer_irqs), + __IRQ_STAT(cpu, local_timer_irqs) - + state->last_local_timer_irqs[cpu]); + state->last_local_timer_irqs[cpu] = + __IRQ_STAT(cpu, local_timer_irqs); + } +} + +struct stacktrace_state { + struct fiq_debugger_state *state; + unsigned int depth; +}; + +static int report_trace(struct stackframe *frame, void *d) +{ + struct stacktrace_state *sts = d; + + if (sts->depth) { + debug_printf(sts->state, + " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", + frame->pc, frame->pc, frame->lr, frame->lr, + frame->sp, frame->fp); + sts->depth--; + return 0; + } + debug_printf(sts->state, " ...\n"); + + return sts->depth == 0; +} + +struct frame_tail { + struct frame_tail *fp; + unsigned long sp; + unsigned long lr; +} __attribute__((packed)); + +static struct frame_tail *user_backtrace(struct fiq_debugger_state *state, + struct frame_tail *tail) +{ + struct frame_tail buftail[2]; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) { + debug_printf(state, " invalid frame pointer %p\n", tail); + return NULL; + } + if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) { + debug_printf(state, + " failed to copy frame pointer %p\n", tail); + return NULL; + } + + debug_printf(state, " %p\n", buftail[0].lr); + + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (tail >= buftail[0].fp) + return NULL; + + return buftail[0].fp-1; +} + +void dump_stacktrace(struct fiq_debugger_state *state, + struct pt_regs * const regs, unsigned int depth, void *ssp) +{ + struct frame_tail *tail; + struct thread_info *real_thread_info = THREAD_INFO(ssp); + struct stacktrace_state sts; + + sts.depth = depth; + sts.state = state; + *current_thread_info() = *real_thread_info; + + if (!current) + debug_printf(state, "current NULL\n"); + else + debug_printf(state, "pid: %d comm: %s\n", + current->pid, current->comm); + dump_regs(state, (unsigned *)regs); + + if (!user_mode(regs)) { + struct stackframe frame; + frame.fp = regs->ARM_fp; + frame.sp = regs->ARM_sp; + frame.lr = regs->ARM_lr; + frame.pc = regs->ARM_pc; + debug_printf(state, + " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", + regs->ARM_pc, regs->ARM_pc, regs->ARM_lr, regs->ARM_lr, + regs->ARM_sp, regs->ARM_fp); + walk_stackframe(&frame, report_trace, &sts); + return; + } + + tail = ((struct frame_tail *) regs->ARM_fp) - 1; + while (depth-- && tail && !((unsigned long) tail & 3)) + tail = user_backtrace(state, tail); +} + +static void do_ps(struct fiq_debugger_state *state) +{ + struct task_struct *g; + struct task_struct *p; + unsigned task_state; + static const char stat_nam[] = "RSDTtZX"; + + debug_printf(state, "pid ppid prio task pc\n"); + read_lock(&tasklist_lock); + do_each_thread(g, p) { + task_state = p->state ? __ffs(p->state) + 1 : 0; + debug_printf(state, + "%5d %5d %4d ", p->pid, p->parent->pid, p->prio); + debug_printf(state, "%-13.13s %c", p->comm, + task_state >= sizeof(stat_nam) ? '?' : stat_nam[task_state]); + if (task_state == TASK_RUNNING) + debug_printf(state, " running\n"); + else + debug_printf(state, " %08lx\n", thread_saved_pc(p)); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); +} + +#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE +static void begin_syslog_dump(struct fiq_debugger_state *state) +{ + state->syslog_dumping = true; +} + +static void end_syslog_dump(struct fiq_debugger_state *state) +{ + state->syslog_dumping = false; +} +#else +extern int do_syslog(int type, char __user *bug, int count); +static void begin_syslog_dump(struct fiq_debugger_state *state) +{ + do_syslog(5 /* clear */, NULL, 0); +} + +static void end_syslog_dump(struct fiq_debugger_state *state) +{ + char buf[128]; + int ret; + int idx = 0; + + while (1) { + ret = log_buf_copy(buf, idx, sizeof(buf) - 1); + if (ret <= 0) + break; + buf[ret] = 0; + debug_printf(state, "%s", buf); + idx += ret; + } +} +#endif + +static void do_sysrq(struct fiq_debugger_state *state, char rq) +{ + begin_syslog_dump(state); + handle_sysrq(rq); + end_syslog_dump(state); +} + +/* This function CANNOT be called in FIQ context */ +static void debug_irq_exec(struct fiq_debugger_state *state, char *cmd) +{ + if (!strcmp(cmd, "ps")) + do_ps(state); + if (!strcmp(cmd, "sysrq")) + do_sysrq(state, 'h'); + if (!strncmp(cmd, "sysrq ", 6)) + do_sysrq(state, cmd[6]); +} + +static void debug_help(struct fiq_debugger_state *state) +{ + debug_printf(state, "FIQ Debugger commands:\n" + " pc PC status\n" + " regs Register dump\n" + " allregs Extended Register dump\n" + " bt Stack trace\n" + " reboot Reboot\n" + " irqs Interupt status\n" + " kmsg Kernel log\n" + " version Kernel version\n"); + debug_printf(state, " sleep Allow sleep while in FIQ\n" + " nosleep Disable sleep while in FIQ\n" + " console Switch terminal to console\n" + " cpu Current CPU\n" + " cpu Switch to CPU\n"); + debug_printf(state, " ps Process list\n" + " sysrq sysrq options\n" + " sysrq Execute sysrq with \n"); +} + +static void take_affinity(void *info) +{ + struct fiq_debugger_state *state = info; + struct cpumask cpumask; + + cpumask_clear(&cpumask); + cpumask_set_cpu(get_cpu(), &cpumask); + + irq_set_affinity(state->uart_irq, &cpumask); +} + +static void switch_cpu(struct fiq_debugger_state *state, int cpu) +{ + if (!debug_have_fiq(state)) + smp_call_function_single(cpu, take_affinity, state, false); + state->current_cpu = cpu; +} + +static bool debug_fiq_exec(struct fiq_debugger_state *state, + const char *cmd, unsigned *regs, void *svc_sp) +{ + bool signal_helper = false; + + if (!strcmp(cmd, "help") || !strcmp(cmd, "?")) { + debug_help(state); + } else if (!strcmp(cmd, "pc")) { + debug_printf(state, " pc %08x cpsr %08x mode %s\n", + regs[15], regs[16], mode_name(regs[16])); + } else if (!strcmp(cmd, "regs")) { + dump_regs(state, regs); + } else if (!strcmp(cmd, "allregs")) { + dump_allregs(state, regs); + } else if (!strcmp(cmd, "bt")) { + dump_stacktrace(state, (struct pt_regs *)regs, 100, svc_sp); + } else if (!strcmp(cmd, "reboot")) { + arch_reset(0, 0); + } else if (!strcmp(cmd, "irqs")) { + dump_irqs(state); + } else if (!strcmp(cmd, "kmsg")) { + dump_kernel_log(state); + } else if (!strcmp(cmd, "version")) { + debug_printf(state, "%s\n", linux_banner); + } else if (!strcmp(cmd, "sleep")) { + state->no_sleep = false; + debug_printf(state, "enabling sleep\n"); + } else if (!strcmp(cmd, "nosleep")) { + state->no_sleep = true; + debug_printf(state, "disabling sleep\n"); + } else if (!strcmp(cmd, "console")) { + state->console_enable = true; + debug_printf(state, "console mode\n"); + } else if (!strcmp(cmd, "cpu")) { + debug_printf(state, "cpu %d\n", state->current_cpu); + } else if (!strncmp(cmd, "cpu ", 4)) { + unsigned long cpu = 0; + if (strict_strtoul(cmd + 4, 10, &cpu) == 0) + switch_cpu(state, cpu); + else + debug_printf(state, "invalid cpu\n"); + debug_printf(state, "cpu %d\n", state->current_cpu); + } else { + if (state->debug_busy) { + debug_printf(state, + "command processor busy. trying to abort.\n"); + state->debug_abort = -1; + } else { + strcpy(state->debug_cmd, cmd); + state->debug_busy = 1; + } + + return true; + } + if (!state->console_enable) + debug_prompt(state); + + return signal_helper; +} + +static void sleep_timer_expired(unsigned long data) +{ + struct fiq_debugger_state *state = (struct fiq_debugger_state *)data; + unsigned long flags; + + spin_lock_irqsave(&state->sleep_timer_lock, flags); + if (state->uart_enabled && !state->no_sleep) { + if (state->debug_enable && !state->console_enable) { + state->debug_enable = false; + debug_printf_nfiq(state, "suspending fiq debugger\n"); + } + state->ignore_next_wakeup_irq = true; + debug_uart_disable(state); + state->uart_enabled = false; + enable_wakeup_irq(state); + } + wake_unlock(&state->debugger_wake_lock); + spin_unlock_irqrestore(&state->sleep_timer_lock, flags); +} + +static void handle_wakeup(struct fiq_debugger_state *state) +{ + unsigned long flags; + + spin_lock_irqsave(&state->sleep_timer_lock, flags); + if (state->wakeup_irq >= 0 && state->ignore_next_wakeup_irq) { + state->ignore_next_wakeup_irq = false; + } else if (!state->uart_enabled) { + wake_lock(&state->debugger_wake_lock); + debug_uart_enable(state); + state->uart_enabled = true; + disable_wakeup_irq(state); + mod_timer(&state->sleep_timer, jiffies + HZ / 2); + } + spin_unlock_irqrestore(&state->sleep_timer_lock, flags); +} + +static irqreturn_t wakeup_irq_handler(int irq, void *dev) +{ + struct fiq_debugger_state *state = dev; + + if (!state->no_sleep) + debug_puts(state, "WAKEUP\n"); + handle_wakeup(state); + + return IRQ_HANDLED; +} + + +static void debug_handle_irq_context(struct fiq_debugger_state *state) +{ + if (!state->no_sleep) { + unsigned long flags; + + spin_lock_irqsave(&state->sleep_timer_lock, flags); + wake_lock(&state->debugger_wake_lock); + mod_timer(&state->sleep_timer, jiffies + HZ * 5); + spin_unlock_irqrestore(&state->sleep_timer_lock, flags); + } +#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) + if (state->tty) { + int i; + int count = fiq_debugger_ringbuf_level(state->tty_rbuf); + for (i = 0; i < count; i++) { + int c = fiq_debugger_ringbuf_peek(state->tty_rbuf, 0); + tty_insert_flip_char(state->tty, c, TTY_NORMAL); + if (!fiq_debugger_ringbuf_consume(state->tty_rbuf, 1)) + pr_warn("fiq tty failed to consume byte\n"); + } + tty_flip_buffer_push(state->tty); + } +#endif + if (state->debug_busy) { + debug_irq_exec(state, state->debug_cmd); + debug_prompt(state); + state->debug_busy = 0; + } +} + +static int debug_getc(struct fiq_debugger_state *state) +{ + return state->pdata->uart_getc(state->pdev); +} + +static bool debug_handle_uart_interrupt(struct fiq_debugger_state *state, + int this_cpu, void *regs, void *svc_sp) +{ + int c; + static int last_c; + int count = 0; + bool signal_helper = false; + + if (this_cpu != state->current_cpu) { + if (state->in_fiq) + return false; + + if (atomic_inc_return(&state->unhandled_fiq_count) != + MAX_UNHANDLED_FIQ_COUNT) + return false; + + debug_printf(state, "fiq_debugger: cpu %d not responding, " + "reverting to cpu %d\n", state->current_cpu, + this_cpu); + + atomic_set(&state->unhandled_fiq_count, 0); + switch_cpu(state, this_cpu); + return false; + } + + state->in_fiq = true; + + while ((c = debug_getc(state)) != FIQ_DEBUGGER_NO_CHAR) { + count++; + if (!state->debug_enable) { + if ((c == 13) || (c == 10)) { + state->debug_enable = true; + state->debug_count = 0; + debug_prompt(state); + } + } else if (c == FIQ_DEBUGGER_BREAK) { + state->console_enable = false; + debug_puts(state, "fiq debugger mode\n"); + state->debug_count = 0; + debug_prompt(state); +#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE + } else if (state->console_enable && state->tty_rbuf) { + fiq_debugger_ringbuf_push(state->tty_rbuf, c); + signal_helper = true; +#endif + } else if ((c >= ' ') && (c < 127)) { + if (state->debug_count < (DEBUG_MAX - 1)) { + state->debug_buf[state->debug_count++] = c; + state->pdata->uart_putc(state->pdev, c); + } + } else if ((c == 8) || (c == 127)) { + if (state->debug_count > 0) { + state->debug_count--; + state->pdata->uart_putc(state->pdev, 8); + state->pdata->uart_putc(state->pdev, ' '); + state->pdata->uart_putc(state->pdev, 8); + } + } else if ((c == 13) || (c == 10)) { + if (c == '\r' || (c == '\n' && last_c != '\r')) { + state->pdata->uart_putc(state->pdev, '\r'); + state->pdata->uart_putc(state->pdev, '\n'); + } + if (state->debug_count) { + state->debug_buf[state->debug_count] = 0; + state->debug_count = 0; + signal_helper |= + debug_fiq_exec(state, state->debug_buf, + regs, svc_sp); + } else { + debug_prompt(state); + } + } + last_c = c; + } + debug_uart_flush(state); + if (state->pdata->fiq_ack) + state->pdata->fiq_ack(state->pdev, state->fiq); + + /* poke sleep timer if necessary */ + if (state->debug_enable && !state->no_sleep) + signal_helper = true; + + atomic_set(&state->unhandled_fiq_count, 0); + state->in_fiq = false; + + return signal_helper; +} + +static void debug_fiq(struct fiq_glue_handler *h, void *regs, void *svc_sp) +{ + struct fiq_debugger_state *state = + container_of(h, struct fiq_debugger_state, handler); + unsigned int this_cpu = THREAD_INFO(svc_sp)->cpu; + bool need_irq; + + need_irq = debug_handle_uart_interrupt(state, this_cpu, regs, svc_sp); + if (need_irq) + debug_force_irq(state); +} + +/* + * When not using FIQs, we only use this single interrupt as an entry point. + * This just effectively takes over the UART interrupt and does all the work + * in this context. + */ +static irqreturn_t debug_uart_irq(int irq, void *dev) +{ + struct fiq_debugger_state *state = dev; + bool not_done; + + handle_wakeup(state); + + /* handle the debugger irq in regular context */ + not_done = debug_handle_uart_interrupt(state, smp_processor_id(), + get_irq_regs(), + current_thread_info()); + if (not_done) + debug_handle_irq_context(state); + + return IRQ_HANDLED; +} + +/* + * If FIQs are used, not everything can happen in fiq context. + * FIQ handler does what it can and then signals this interrupt to finish the + * job in irq context. + */ +static irqreturn_t debug_signal_irq(int irq, void *dev) +{ + struct fiq_debugger_state *state = dev; + + if (state->pdata->force_irq_ack) + state->pdata->force_irq_ack(state->pdev, state->signal_irq); + + debug_handle_irq_context(state); + + return IRQ_HANDLED; +} + +static void debug_resume(struct fiq_glue_handler *h) +{ + struct fiq_debugger_state *state = + container_of(h, struct fiq_debugger_state, handler); + if (state->pdata->uart_resume) + state->pdata->uart_resume(state->pdev); +} + +#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) +struct tty_driver *debug_console_device(struct console *co, int *index) +{ + struct fiq_debugger_state *state; + state = container_of(co, struct fiq_debugger_state, console); + *index = 0; + return state->tty_driver; +} + +static void debug_console_write(struct console *co, + const char *s, unsigned int count) +{ + struct fiq_debugger_state *state; + + state = container_of(co, struct fiq_debugger_state, console); + + if (!state->console_enable && !state->syslog_dumping) + return; + + debug_uart_enable(state); + while (count--) { + if (*s == '\n') + state->pdata->uart_putc(state->pdev, '\r'); + state->pdata->uart_putc(state->pdev, *s++); + } + debug_uart_flush(state); + debug_uart_disable(state); +} + +static struct console fiq_debugger_console = { + .name = "ttyFIQ", + .device = debug_console_device, + .write = debug_console_write, + .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_ENABLED, +}; + +int fiq_tty_open(struct tty_struct *tty, struct file *filp) +{ + struct fiq_debugger_state *state = tty->driver->driver_state; + if (state->tty_open_count++) + return 0; + + tty->driver_data = state; + state->tty = tty; + return 0; +} + +void fiq_tty_close(struct tty_struct *tty, struct file *filp) +{ + struct fiq_debugger_state *state = tty->driver_data; + if (--state->tty_open_count) + return; + state->tty = NULL; +} + +int fiq_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) +{ + int i; + struct fiq_debugger_state *state = tty->driver_data; + + if (!state->console_enable) + return count; + + debug_uart_enable(state); + for (i = 0; i < count; i++) + state->pdata->uart_putc(state->pdev, *buf++); + debug_uart_disable(state); + + return count; +} + +int fiq_tty_write_room(struct tty_struct *tty) +{ + return 1024; +} + +static const struct tty_operations fiq_tty_driver_ops = { + .write = fiq_tty_write, + .write_room = fiq_tty_write_room, + .open = fiq_tty_open, + .close = fiq_tty_close, +}; + +static int fiq_debugger_tty_init(struct fiq_debugger_state *state) +{ + int ret = -EINVAL; + + state->tty_driver = alloc_tty_driver(1); + if (!state->tty_driver) { + pr_err("Failed to allocate fiq debugger tty\n"); + return -ENOMEM; + } + + state->tty_driver->owner = THIS_MODULE; + state->tty_driver->driver_name = "fiq-debugger"; + state->tty_driver->name = "ttyFIQ"; + state->tty_driver->type = TTY_DRIVER_TYPE_SERIAL; + state->tty_driver->subtype = SERIAL_TYPE_NORMAL; + state->tty_driver->init_termios = tty_std_termios; + state->tty_driver->init_termios.c_cflag = + B115200 | CS8 | CREAD | HUPCL | CLOCAL; + state->tty_driver->init_termios.c_ispeed = + state->tty_driver->init_termios.c_ospeed = 115200; + state->tty_driver->flags = TTY_DRIVER_REAL_RAW; + tty_set_operations(state->tty_driver, &fiq_tty_driver_ops); + state->tty_driver->driver_state = state; + + ret = tty_register_driver(state->tty_driver); + if (ret) { + pr_err("Failed to register fiq tty: %d\n", ret); + goto err; + } + + state->tty_rbuf = fiq_debugger_ringbuf_alloc(1024); + if (!state->tty_rbuf) { + pr_err("Failed to allocate fiq debugger ringbuf\n"); + ret = -ENOMEM; + goto err; + } + + pr_info("Registered FIQ tty driver %p\n", state->tty_driver); + return 0; + +err: + fiq_debugger_ringbuf_free(state->tty_rbuf); + state->tty_rbuf = NULL; + put_tty_driver(state->tty_driver); + return ret; +} +#endif + +static int fiq_debugger_dev_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct fiq_debugger_state *state = platform_get_drvdata(pdev); + + if (state->pdata->uart_dev_suspend) + return state->pdata->uart_dev_suspend(pdev); + return 0; +} + +static int fiq_debugger_dev_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct fiq_debugger_state *state = platform_get_drvdata(pdev); + + if (state->pdata->uart_dev_resume) + return state->pdata->uart_dev_resume(pdev); + return 0; +} + +static int fiq_debugger_probe(struct platform_device *pdev) +{ + int ret; + struct fiq_debugger_pdata *pdata = dev_get_platdata(&pdev->dev); + struct fiq_debugger_state *state; + int fiq; + int uart_irq; + + if (!pdata->uart_getc || !pdata->uart_putc) + return -EINVAL; + if ((pdata->uart_enable && !pdata->uart_disable) || + (!pdata->uart_enable && pdata->uart_disable)) + return -EINVAL; + + fiq = platform_get_irq_byname(pdev, "fiq"); + uart_irq = platform_get_irq_byname(pdev, "uart_irq"); + + /* uart_irq mode and fiq mode are mutually exclusive, but one of them + * is required */ + if ((uart_irq < 0 && fiq < 0) || (uart_irq >= 0 && fiq >= 0)) + return -EINVAL; + if (fiq >= 0 && !pdata->fiq_enable) + return -EINVAL; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + setup_timer(&state->sleep_timer, sleep_timer_expired, + (unsigned long)state); + state->pdata = pdata; + state->pdev = pdev; + state->no_sleep = initial_no_sleep; + state->debug_enable = initial_debug_enable; + state->console_enable = initial_console_enable; + + state->fiq = fiq; + state->uart_irq = uart_irq; + state->signal_irq = platform_get_irq_byname(pdev, "signal"); + state->wakeup_irq = platform_get_irq_byname(pdev, "wakeup"); + + platform_set_drvdata(pdev, state); + + spin_lock_init(&state->sleep_timer_lock); + + if (state->wakeup_irq < 0 && debug_have_fiq(state)) + state->no_sleep = true; + state->ignore_next_wakeup_irq = !state->no_sleep; + + wake_lock_init(&state->debugger_wake_lock, + WAKE_LOCK_SUSPEND, "serial-debug"); + + state->clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(state->clk)) + state->clk = NULL; + + /* do not call pdata->uart_enable here since uart_init may still + * need to do some initialization before uart_enable can work. + * So, only try to manage the clock during init. + */ + if (state->clk) + clk_enable(state->clk); + + if (pdata->uart_init) { + ret = pdata->uart_init(pdev); + if (ret) + goto err_uart_init; + } + + debug_printf_nfiq(state, "\n", + state->no_sleep ? "" : "twice "); + + if (debug_have_fiq(state)) { + state->handler.fiq = debug_fiq; + state->handler.resume = debug_resume; + ret = fiq_glue_register_handler(&state->handler); + if (ret) { + pr_err("%s: could not install fiq handler\n", __func__); + goto err_register_fiq; + } + + pdata->fiq_enable(pdev, state->fiq, 1); + } else { + ret = request_irq(state->uart_irq, debug_uart_irq, + IRQF_NO_SUSPEND, "debug", state); + if (ret) { + pr_err("%s: could not install irq handler\n", __func__); + goto err_register_irq; + } + + /* for irq-only mode, we want this irq to wake us up, if it + * can. + */ + enable_irq_wake(state->uart_irq); + } + + if (state->clk) + clk_disable(state->clk); + + if (state->signal_irq >= 0) { + ret = request_irq(state->signal_irq, debug_signal_irq, + IRQF_TRIGGER_RISING, "debug-signal", state); + if (ret) + pr_err("serial_debugger: could not install signal_irq"); + } + + if (state->wakeup_irq >= 0) { + ret = request_irq(state->wakeup_irq, wakeup_irq_handler, + IRQF_TRIGGER_FALLING | IRQF_DISABLED, + "debug-wakeup", state); + if (ret) { + pr_err("serial_debugger: " + "could not install wakeup irq\n"); + state->wakeup_irq = -1; + } else { + ret = enable_irq_wake(state->wakeup_irq); + if (ret) { + pr_err("serial_debugger: " + "could not enable wakeup\n"); + state->wakeup_irq_no_set_wake = true; + } + } + } + if (state->no_sleep) + handle_wakeup(state); + +#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) + state->console = fiq_debugger_console; + register_console(&state->console); + fiq_debugger_tty_init(state); +#endif + return 0; + +err_register_irq: +err_register_fiq: + if (pdata->uart_free) + pdata->uart_free(pdev); +err_uart_init: + if (state->clk) + clk_disable(state->clk); + if (state->clk) + clk_put(state->clk); + wake_lock_destroy(&state->debugger_wake_lock); + platform_set_drvdata(pdev, NULL); + kfree(state); + return ret; +} + +static const struct dev_pm_ops fiq_debugger_dev_pm_ops = { + .suspend = fiq_debugger_dev_suspend, + .resume = fiq_debugger_dev_resume, +}; + +static struct platform_driver fiq_debugger_driver = { + .probe = fiq_debugger_probe, + .driver = { + .name = "fiq_debugger", + .pm = &fiq_debugger_dev_pm_ops, + }, +}; + +static int __init fiq_debugger_init(void) +{ + return platform_driver_register(&fiq_debugger_driver); +} + +postcore_initcall(fiq_debugger_init); diff --git a/arch/arm/common/fiq_debugger_ringbuf.h b/arch/arm/common/fiq_debugger_ringbuf.h new file mode 100644 index 000000000000..2649b5581088 --- /dev/null +++ b/arch/arm/common/fiq_debugger_ringbuf.h @@ -0,0 +1,94 @@ +/* + * arch/arm/common/fiq_debugger_ringbuf.c + * + * simple lockless ringbuffer + * + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + +struct fiq_debugger_ringbuf { + int len; + int head; + int tail; + u8 buf[]; +}; + + +static inline struct fiq_debugger_ringbuf *fiq_debugger_ringbuf_alloc(int len) +{ + struct fiq_debugger_ringbuf *rbuf; + + rbuf = kzalloc(sizeof(*rbuf) + len, GFP_KERNEL); + if (rbuf == NULL) + return NULL; + + rbuf->len = len; + rbuf->head = 0; + rbuf->tail = 0; + smp_mb(); + + return rbuf; +} + +static inline void fiq_debugger_ringbuf_free(struct fiq_debugger_ringbuf *rbuf) +{ + kfree(rbuf); +} + +static inline int fiq_debugger_ringbuf_level(struct fiq_debugger_ringbuf *rbuf) +{ + int level = rbuf->head - rbuf->tail; + + if (level < 0) + level = rbuf->len + level; + + return level; +} + +static inline int fiq_debugger_ringbuf_room(struct fiq_debugger_ringbuf *rbuf) +{ + return rbuf->len - fiq_debugger_ringbuf_level(rbuf) - 1; +} + +static inline u8 +fiq_debugger_ringbuf_peek(struct fiq_debugger_ringbuf *rbuf, int i) +{ + return rbuf->buf[(rbuf->tail + i) % rbuf->len]; +} + +static inline int +fiq_debugger_ringbuf_consume(struct fiq_debugger_ringbuf *rbuf, int count) +{ + count = min(count, fiq_debugger_ringbuf_level(rbuf)); + + rbuf->tail = (rbuf->tail + count) % rbuf->len; + smp_mb(); + + return count; +} + +static inline int +fiq_debugger_ringbuf_push(struct fiq_debugger_ringbuf *rbuf, u8 datum) +{ + if (fiq_debugger_ringbuf_room(rbuf) == 0) + return 0; + + rbuf->buf[rbuf->head] = datum; + smp_mb(); + rbuf->head = (rbuf->head + 1) % rbuf->len; + smp_mb(); + + return 1; +} diff --git a/arch/arm/include/asm/fiq_debugger.h b/arch/arm/include/asm/fiq_debugger.h new file mode 100644 index 000000000000..4d274883ba6a --- /dev/null +++ b/arch/arm/include/asm/fiq_debugger.h @@ -0,0 +1,64 @@ +/* + * arch/arm/include/asm/fiq_debugger.h + * + * Copyright (C) 2010 Google, Inc. + * Author: Colin Cross + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _ARCH_ARM_MACH_TEGRA_FIQ_DEBUGGER_H_ +#define _ARCH_ARM_MACH_TEGRA_FIQ_DEBUGGER_H_ + +#include + +#define FIQ_DEBUGGER_NO_CHAR NO_POLL_CHAR +#define FIQ_DEBUGGER_BREAK 0x00ff0100 + +#define FIQ_DEBUGGER_FIQ_IRQ_NAME "fiq" +#define FIQ_DEBUGGER_SIGNAL_IRQ_NAME "signal" +#define FIQ_DEBUGGER_WAKEUP_IRQ_NAME "wakeup" + +/** + * struct fiq_debugger_pdata - fiq debugger platform data + * @uart_resume: used to restore uart state right before enabling + * the fiq. + * @uart_enable: Do the work necessary to communicate with the uart + * hw (enable clocks, etc.). This must be ref-counted. + * @uart_disable: Do the work necessary to disable the uart hw + * (disable clocks, etc.). This must be ref-counted. + * @uart_dev_suspend: called during PM suspend, generally not needed + * for real fiq mode debugger. + * @uart_dev_resume: called during PM resume, generally not needed + * for real fiq mode debugger. + */ +struct fiq_debugger_pdata { + int (*uart_init)(struct platform_device *pdev); + void (*uart_free)(struct platform_device *pdev); + int (*uart_resume)(struct platform_device *pdev); + int (*uart_getc)(struct platform_device *pdev); + void (*uart_putc)(struct platform_device *pdev, unsigned int c); + void (*uart_flush)(struct platform_device *pdev); + void (*uart_enable)(struct platform_device *pdev); + void (*uart_disable)(struct platform_device *pdev); + + int (*uart_dev_suspend)(struct platform_device *pdev); + int (*uart_dev_resume)(struct platform_device *pdev); + + void (*fiq_enable)(struct platform_device *pdev, unsigned int fiq, + bool enable); + void (*fiq_ack)(struct platform_device *pdev, unsigned int fiq); + + void (*force_irq)(struct platform_device *pdev, unsigned int irq); + void (*force_irq_ack)(struct platform_device *pdev, unsigned int irq); +}; + +#endif -- GitLab From 69279c68eed43b11b89d0fe8c13e2cbf2369b42b Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 14 Mar 2012 16:28:45 -0700 Subject: [PATCH 0867/1442] ANDROID: ARM: fiq_debugger: fix compiling for v3.3 Call kernel_restart instead of arch_reset, the ARM reset handling has changed. Remove localtimer irq printing, they now show up in the regular irq stats. Change-Id: I523da343b292c5711f3e1cbfd766d32eea2da84e Signed-off-by: Colin Cross --- arch/arm/common/fiq_debugger.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index 3ed18ae2ed80..0e33748edd60 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -37,8 +38,6 @@ #include #include -#include - #include #include "fiq_debugger_ringbuf.h" @@ -357,7 +356,6 @@ static void dump_allregs(struct fiq_debugger_state *state, unsigned *regs) static void dump_irqs(struct fiq_debugger_state *state) { int n; - unsigned int cpu; debug_printf(state, "irqnr total since-last status name\n"); for (n = 0; n < NR_IRQS; n++) { @@ -371,16 +369,6 @@ static void dump_irqs(struct fiq_debugger_state *state) (act && act->name) ? act->name : "???"); state->last_irqs[n] = kstat_irqs(n); } - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - - debug_printf(state, "LOC %d: %10u %11u\n", cpu, - __IRQ_STAT(cpu, local_timer_irqs), - __IRQ_STAT(cpu, local_timer_irqs) - - state->last_local_timer_irqs[cpu]); - state->last_local_timer_irqs[cpu] = - __IRQ_STAT(cpu, local_timer_irqs); - } } struct stacktrace_state { @@ -605,7 +593,7 @@ static bool debug_fiq_exec(struct fiq_debugger_state *state, } else if (!strcmp(cmd, "bt")) { dump_stacktrace(state, (struct pt_regs *)regs, 100, svc_sp); } else if (!strcmp(cmd, "reboot")) { - arch_reset(0, 0); + kernel_restart(NULL); } else if (!strcmp(cmd, "irqs")) { dump_irqs(state); } else if (!strcmp(cmd, "kmsg")) { -- GitLab From ae9d69ae99521cc8dbf8c0fccb946d7ab78d7628 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 14 Mar 2012 16:29:47 -0700 Subject: [PATCH 0868/1442] ANDROID: ARM: fiq_debugger: add support for reboot commands Pass the rest of the reboot command to kernel_restart to allow reboot bootloader to work from FIQ debugger. Change-Id: I4e7b366a69268dda17ffcf4c84f2373d15cb1271 Signed-off-by: Colin Cross --- arch/arm/common/fiq_debugger.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index 0e33748edd60..3f75495fab02 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -592,8 +592,17 @@ static bool debug_fiq_exec(struct fiq_debugger_state *state, dump_allregs(state, regs); } else if (!strcmp(cmd, "bt")) { dump_stacktrace(state, (struct pt_regs *)regs, 100, svc_sp); - } else if (!strcmp(cmd, "reboot")) { - kernel_restart(NULL); + } else if (!strncmp(cmd, "reboot", 6)) { + cmd += 6; + while (*cmd == ' ') + cmd++; + if (*cmd) { + char tmp_cmd[32]; + strlcpy(tmp_cmd, cmd, sizeof(tmp_cmd)); + kernel_restart(tmp_cmd); + } else { + kernel_restart(NULL); + } } else if (!strcmp(cmd, "irqs")) { dump_irqs(state); } else if (!strcmp(cmd, "kmsg")) { -- GitLab From 31c550f5448fe489a22c9a5f6251d0883b17d90f Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Thu, 15 Mar 2012 12:57:20 -0700 Subject: [PATCH 0869/1442] ANDROID: ARM: fiq_debugger: add debug_putc Convert all the calls to state->pdata->uart_putc to a debug_putc helper. Change-Id: Idc007bd170ff1b51d0325e238105ae0c86d23777 Signed-off-by: Colin Cross --- arch/arm/common/fiq_debugger.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index 3f75495fab02..909ef56596e8 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -174,13 +174,18 @@ static void debug_uart_flush(struct fiq_debugger_state *state) state->pdata->uart_flush(state->pdev); } +static void debug_putc(struct fiq_debugger_state *state, char c) +{ + state->pdata->uart_putc(state->pdev, c); +} + static void debug_puts(struct fiq_debugger_state *state, char *s) { unsigned c; while ((c = *s++)) { if (c == '\n') - state->pdata->uart_putc(state->pdev, '\r'); - state->pdata->uart_putc(state->pdev, c); + debug_putc(state, '\r'); + debug_putc(state, c); } } @@ -777,19 +782,19 @@ static bool debug_handle_uart_interrupt(struct fiq_debugger_state *state, } else if ((c >= ' ') && (c < 127)) { if (state->debug_count < (DEBUG_MAX - 1)) { state->debug_buf[state->debug_count++] = c; - state->pdata->uart_putc(state->pdev, c); + debug_putc(state, c); } } else if ((c == 8) || (c == 127)) { if (state->debug_count > 0) { state->debug_count--; - state->pdata->uart_putc(state->pdev, 8); - state->pdata->uart_putc(state->pdev, ' '); - state->pdata->uart_putc(state->pdev, 8); + debug_putc(state, 8); + debug_putc(state, ' '); + debug_putc(state, 8); } } else if ((c == 13) || (c == 10)) { if (c == '\r' || (c == '\n' && last_c != '\r')) { - state->pdata->uart_putc(state->pdev, '\r'); - state->pdata->uart_putc(state->pdev, '\n'); + debug_putc(state, '\r'); + debug_putc(state, '\n'); } if (state->debug_count) { state->debug_buf[state->debug_count] = 0; @@ -898,8 +903,8 @@ static void debug_console_write(struct console *co, debug_uart_enable(state); while (count--) { if (*s == '\n') - state->pdata->uart_putc(state->pdev, '\r'); - state->pdata->uart_putc(state->pdev, *s++); + debug_putc(state, '\r'); + debug_putc(state, *s++); } debug_uart_flush(state); debug_uart_disable(state); @@ -941,7 +946,7 @@ int fiq_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) debug_uart_enable(state); for (i = 0; i < count; i++) - state->pdata->uart_putc(state->pdev, *buf++); + debug_putc(state, *buf++); debug_uart_disable(state); return count; -- GitLab From 3bfc29b0ba1608e823ac0fd76e6b82105448d4a4 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 14 Mar 2012 19:23:29 -0700 Subject: [PATCH 0870/1442] ANDROID: ARM: fiq_debugger: add support for kgdb Adds polling tty ops to the fiq debugger console tty, which allows kgdb to run against an fiq debugger console. Add a check in do_sysrq to prevent enabling kgdb from the fiq debugger unless a flag (writable only by root) has been set. This should make it safe to enable KGDB on a production device. Also add a shortcut to enable the console and kgdb together, to allow kgdb to be enabled when the shell on the console is not responding. Change-Id: Ifc65239ca96c9887431a6a36b9b44a539002f544 Signed-off-by: Colin Cross --- arch/arm/common/fiq_debugger.c | 73 +++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index 909ef56596e8..1f64d7dc83b4 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -106,9 +106,12 @@ static bool initial_debug_enable; static bool initial_console_enable; #endif +static bool fiq_kgdb_enable; + module_param_named(no_sleep, initial_no_sleep, bool, 0644); module_param_named(debug_enable, initial_debug_enable, bool, 0644); module_param_named(console_enable, initial_console_enable, bool, 0644); +module_param_named(kgdb_enable, fiq_kgdb_enable, bool, 0644); #ifdef CONFIG_FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON static inline void enable_wakeup_irq(struct fiq_debugger_state *state) {} @@ -526,11 +529,29 @@ static void end_syslog_dump(struct fiq_debugger_state *state) static void do_sysrq(struct fiq_debugger_state *state, char rq) { + if ((rq == 'g' || rq == 'G') && !fiq_kgdb_enable) { + debug_printf(state, "sysrq-g blocked\n"); + return; + } begin_syslog_dump(state); handle_sysrq(rq); end_syslog_dump(state); } +#ifdef CONFIG_KGDB +static void do_kgdb(struct fiq_debugger_state *state) +{ + if (!fiq_kgdb_enable) { + debug_printf(state, "kgdb through fiq debugger not enabled\n"); + return; + } + + debug_printf(state, "enabling console and triggering kgdb\n"); + state->console_enable = true; + handle_sysrq('g'); +} +#endif + /* This function CANNOT be called in FIQ context */ static void debug_irq_exec(struct fiq_debugger_state *state, char *cmd) { @@ -540,6 +561,10 @@ static void debug_irq_exec(struct fiq_debugger_state *state, char *cmd) do_sysrq(state, 'h'); if (!strncmp(cmd, "sysrq ", 6)) do_sysrq(state, cmd[6]); +#ifdef CONFIG_KGDB + if (!strcmp(cmd, "kgdb")) + do_kgdb(state); +#endif } static void debug_help(struct fiq_debugger_state *state) @@ -561,6 +586,9 @@ static void debug_help(struct fiq_debugger_state *state) debug_printf(state, " ps Process list\n" " sysrq sysrq options\n" " sysrq Execute sysrq with \n"); +#ifdef CONFIG_KGDB + debug_printf(state, " kgdb Enter kernel debugger\n"); +#endif } static void take_affinity(void *info) @@ -724,7 +752,8 @@ static void debug_handle_irq_context(struct fiq_debugger_state *state) #endif if (state->debug_busy) { debug_irq_exec(state, state->debug_cmd); - debug_prompt(state); + if (!state->console_enable) + debug_prompt(state); state->debug_busy = 0; } } @@ -957,11 +986,53 @@ int fiq_tty_write_room(struct tty_struct *tty) return 1024; } +#ifdef CONFIG_CONSOLE_POLL +static int fiq_tty_poll_init(struct tty_driver *driver, int line, char *options) +{ + return 0; +} + +static int fiq_tty_poll_get_char(struct tty_driver *driver, int line) +{ + struct fiq_debugger_state *state = driver->ttys[line]->driver_data; + int c = NO_POLL_CHAR; + + debug_uart_enable(state); + if (debug_have_fiq(state)) { + int count = fiq_debugger_ringbuf_level(state->tty_rbuf); + if (count > 0) { + c = fiq_debugger_ringbuf_peek(state->tty_rbuf, 0); + fiq_debugger_ringbuf_consume(state->tty_rbuf, 1); + } + } else { + c = debug_getc(state); + if (c == FIQ_DEBUGGER_NO_CHAR) + c = NO_POLL_CHAR; + } + debug_uart_disable(state); + + return c; +} + +static void fiq_tty_poll_put_char(struct tty_driver *driver, int line, char ch) +{ + struct fiq_debugger_state *state = driver->ttys[line]->driver_data; + debug_uart_enable(state); + debug_putc(state, ch); + debug_uart_disable(state); +} +#endif + static const struct tty_operations fiq_tty_driver_ops = { .write = fiq_tty_write, .write_room = fiq_tty_write_room, .open = fiq_tty_open, .close = fiq_tty_close, +#ifdef CONFIG_CONSOLE_POLL + .poll_init = fiq_tty_poll_init, + .poll_get_char = fiq_tty_poll_get_char, + .poll_put_char = fiq_tty_poll_put_char, +#endif }; static int fiq_debugger_tty_init(struct fiq_debugger_state *state) -- GitLab From 085886c946f1a62a1fc54311f9738a5ff87d7741 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 14 Mar 2012 19:26:53 -0700 Subject: [PATCH 0871/1442] ANDROID: kdb: support new lines without carriage returns kdb expects carriage returns through the serial port to terminate commands. Modify it to accept the first seen carriage return or new line as a terminator, but not treat \r\n as two terminators. Change-Id: I06166017e7703d24310eefcb71c3a7d427088db7 Signed-off-by: Colin Cross --- kernel/debug/kdb/kdb_io.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index fc1ef736253c..0b891286a150 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize) int i; int diag, dtab_count; int key; - + static int last_crlf; diag = kdbgetintenv("DTABCOUNT", &dtab_count); if (diag) @@ -237,6 +237,9 @@ static char *kdb_read(char *buffer, size_t bufsize) return buffer; if (key != 9) tab = 0; + if (key != 10 && key != 13) + last_crlf = 0; + switch (key) { case 8: /* backspace */ if (cp > buffer) { @@ -254,7 +257,12 @@ static char *kdb_read(char *buffer, size_t bufsize) *cp = tmp; } break; - case 13: /* enter */ + case 10: /* new line */ + case 13: /* carriage return */ + /* handle \n after \r */ + if (last_crlf && last_crlf != key) + break; + last_crlf = key; *lastchar++ = '\n'; *lastchar++ = '\0'; if (!KDB_STATE(KGDB_TRANS)) { -- GitLab From 77fcefe0044226e4eb507e8027d4f19a3b7c2e12 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Sun, 18 Mar 2012 15:25:55 -0700 Subject: [PATCH 0872/1442] ANDROID: ARM: fiq_debugger: fix multiple consoles and make it a preferred console Fix setting up consoles on multiple fiq debugger devices by splitting the tty driver init into the initcall, and initializing the single tty device during probe. Has the side effect of moving the tty device node to /dev/ttyFIQx, where x is the platform device id, which should normally match the serial port. To avoid having to pass a different console=/dev/ttyFIQx for every device, make the fiq debugger a preferred console that will be used by default if no console was passed on the command line. Change-Id: I6cc2670628a41e84615859bc96adba189966d647 Signed-off-by: Colin Cross --- arch/arm/common/fiq_debugger.c | 111 ++++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 28 deletions(-) diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index 1f64d7dc83b4..ac952241fdd2 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -45,6 +45,8 @@ #define DEBUG_MAX 64 #define MAX_UNHANDLED_FIQ_COUNT 1000000 +#define MAX_FIQ_DEBUGGER_PORTS 4 + #define THREAD_INFO(sp) ((struct thread_info *) \ ((unsigned long)(sp) & ~(THREAD_SIZE - 1))) @@ -81,7 +83,6 @@ struct fiq_debugger_state { #ifdef CONFIG_FIQ_DEBUGGER_CONSOLE struct console console; - struct tty_driver *tty_driver; struct tty_struct *tty; int tty_open_count; struct fiq_debugger_ringbuf *tty_rbuf; @@ -92,6 +93,10 @@ struct fiq_debugger_state { unsigned int last_local_timer_irqs[NR_CPUS]; }; +#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE +struct tty_driver *fiq_tty_driver; +#endif + #ifdef CONFIG_FIQ_DEBUGGER_NO_SLEEP static bool initial_no_sleep = true; #else @@ -913,10 +918,8 @@ static void debug_resume(struct fiq_glue_handler *h) #if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) struct tty_driver *debug_console_device(struct console *co, int *index) { - struct fiq_debugger_state *state; - state = container_of(co, struct fiq_debugger_state, console); - *index = 0; - return state->tty_driver; + *index = co->index; + return fiq_tty_driver; } static void debug_console_write(struct console *co, @@ -948,7 +951,9 @@ static struct console fiq_debugger_console = { int fiq_tty_open(struct tty_struct *tty, struct file *filp) { - struct fiq_debugger_state *state = tty->driver->driver_state; + int line = tty->index; + struct fiq_debugger_state **states = tty->driver->driver_state; + struct fiq_debugger_state *state = states[line]; if (state->tty_open_count++) return 0; @@ -1035,36 +1040,66 @@ static const struct tty_operations fiq_tty_driver_ops = { #endif }; -static int fiq_debugger_tty_init(struct fiq_debugger_state *state) +static int fiq_debugger_tty_init(void) { - int ret = -EINVAL; + int ret; + struct fiq_debugger_state **states = NULL; - state->tty_driver = alloc_tty_driver(1); - if (!state->tty_driver) { - pr_err("Failed to allocate fiq debugger tty\n"); + states = kzalloc(sizeof(*states) * MAX_FIQ_DEBUGGER_PORTS, GFP_KERNEL); + if (!states) { + pr_err("Failed to allocate fiq debugger state structres\n"); return -ENOMEM; } - state->tty_driver->owner = THIS_MODULE; - state->tty_driver->driver_name = "fiq-debugger"; - state->tty_driver->name = "ttyFIQ"; - state->tty_driver->type = TTY_DRIVER_TYPE_SERIAL; - state->tty_driver->subtype = SERIAL_TYPE_NORMAL; - state->tty_driver->init_termios = tty_std_termios; - state->tty_driver->init_termios.c_cflag = + fiq_tty_driver = alloc_tty_driver(MAX_FIQ_DEBUGGER_PORTS); + if (!fiq_tty_driver) { + pr_err("Failed to allocate fiq debugger tty\n"); + ret = -ENOMEM; + goto err_free_state; + } + + fiq_tty_driver->owner = THIS_MODULE; + fiq_tty_driver->driver_name = "fiq-debugger"; + fiq_tty_driver->name = "ttyFIQ"; + fiq_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; + fiq_tty_driver->subtype = SERIAL_TYPE_NORMAL; + fiq_tty_driver->init_termios = tty_std_termios; + fiq_tty_driver->flags = TTY_DRIVER_REAL_RAW | + TTY_DRIVER_DYNAMIC_DEV; + fiq_tty_driver->driver_state = states; + + fiq_tty_driver->init_termios.c_cflag = B115200 | CS8 | CREAD | HUPCL | CLOCAL; - state->tty_driver->init_termios.c_ispeed = - state->tty_driver->init_termios.c_ospeed = 115200; - state->tty_driver->flags = TTY_DRIVER_REAL_RAW; - tty_set_operations(state->tty_driver, &fiq_tty_driver_ops); - state->tty_driver->driver_state = state; + fiq_tty_driver->init_termios.c_ispeed = 115200; + fiq_tty_driver->init_termios.c_ospeed = 115200; - ret = tty_register_driver(state->tty_driver); + tty_set_operations(fiq_tty_driver, &fiq_tty_driver_ops); + + ret = tty_register_driver(fiq_tty_driver); if (ret) { pr_err("Failed to register fiq tty: %d\n", ret); - goto err; + goto err_free_tty; } + pr_info("Registered FIQ tty driver\n"); + return 0; + +err_free_tty: + put_tty_driver(fiq_tty_driver); + fiq_tty_driver = NULL; +err_free_state: + kfree(states); + return ret; +} + +static int fiq_debugger_tty_init_one(struct fiq_debugger_state *state) +{ + int ret; + struct device *tty_dev; + struct fiq_debugger_state **states = fiq_tty_driver->driver_state; + + states[state->pdev->id] = state; + state->tty_rbuf = fiq_debugger_ringbuf_alloc(1024); if (!state->tty_rbuf) { pr_err("Failed to allocate fiq debugger ringbuf\n"); @@ -1072,13 +1107,23 @@ static int fiq_debugger_tty_init(struct fiq_debugger_state *state) goto err; } - pr_info("Registered FIQ tty driver %p\n", state->tty_driver); + tty_dev = tty_register_device(fiq_tty_driver, state->pdev->id, + &state->pdev->dev); + if (IS_ERR(tty_dev)) { + pr_err("Failed to register fiq debugger tty device\n"); + ret = PTR_ERR(tty_dev); + goto err; + } + + device_set_wakeup_capable(tty_dev, 1); + + pr_info("Registered fiq debugger ttyFIQ%d\n", state->pdev->id); + return 0; err: fiq_debugger_ringbuf_free(state->tty_rbuf); state->tty_rbuf = NULL; - put_tty_driver(state->tty_driver); return ret; } #endif @@ -1111,6 +1156,9 @@ static int fiq_debugger_probe(struct platform_device *pdev) int fiq; int uart_irq; + if (pdev->id >= MAX_FIQ_DEBUGGER_PORTS) + return -EINVAL; + if (!pdata->uart_getc || !pdata->uart_putc) return -EINVAL; if ((pdata->uart_enable && !pdata->uart_disable) || @@ -1228,8 +1276,12 @@ static int fiq_debugger_probe(struct platform_device *pdev) #if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) state->console = fiq_debugger_console; + state->console.index = pdev->id; + if (!console_set_on_cmdline) + add_preferred_console(state->console.name, + state->console.index, NULL); register_console(&state->console); - fiq_debugger_tty_init(state); + fiq_debugger_tty_init_one(state); #endif return 0; @@ -1263,6 +1315,9 @@ static struct platform_driver fiq_debugger_driver = { static int __init fiq_debugger_init(void) { +#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) + fiq_debugger_tty_init(); +#endif return platform_driver_register(&fiq_debugger_driver); } -- GitLab From 2b672caf96ad6f417c83e8bfc0c4fc1a7d91132c Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Thu, 19 Jul 2012 18:40:04 -0700 Subject: [PATCH 0873/1442] ANDROID: ARM: fiq_debugger: add process context reboot command kernel_restart cannot be called from interrupt context. Add support for commands called from a work function, and implement the "reboot" command there. Also rename the existing irq-mode command to "reset" and change it to use machine_restart instead of kernel_restart. Change-Id: I3c423147c01db03d89e95a5b99096ca89462079f Signed-off-by: Colin Cross --- arch/arm/common/fiq_debugger.c | 67 +++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index ac952241fdd2..a12810b5fb68 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -81,6 +81,10 @@ struct fiq_debugger_state { atomic_t unhandled_fiq_count; bool in_fiq; + struct work_struct work; + spinlock_t work_lock; + char work_cmd[DEBUG_MAX]; + #ifdef CONFIG_FIQ_DEBUGGER_CONSOLE struct console console; struct tty_struct *tty; @@ -557,6 +561,53 @@ static void do_kgdb(struct fiq_debugger_state *state) } #endif +static void debug_schedule_work(struct fiq_debugger_state *state, char *cmd) +{ + unsigned long flags; + + spin_lock_irqsave(&state->work_lock, flags); + if (state->work_cmd[0] != '\0') { + debug_printf(state, "work command processor busy\n"); + spin_unlock_irqrestore(&state->work_lock, flags); + return; + } + + strlcpy(state->work_cmd, cmd, sizeof(state->work_cmd)); + spin_unlock_irqrestore(&state->work_lock, flags); + + schedule_work(&state->work); +} + +static void debug_work(struct work_struct *work) +{ + struct fiq_debugger_state *state; + char work_cmd[DEBUG_MAX]; + char *cmd; + unsigned long flags; + + state = container_of(work, struct fiq_debugger_state, work); + + spin_lock_irqsave(&state->work_lock, flags); + + strlcpy(work_cmd, state->work_cmd, sizeof(work_cmd)); + state->work_cmd[0] = '\0'; + + spin_unlock_irqrestore(&state->work_lock, flags); + + cmd = work_cmd; + if (!strncmp(cmd, "reboot", 6)) { + cmd += 6; + while (*cmd == ' ') + cmd++; + if (cmd != '\0') + kernel_restart(cmd); + else + kernel_restart(NULL); + } else { + debug_printf(state, "unknown work command '%s'\n", work_cmd); + } +} + /* This function CANNOT be called in FIQ context */ static void debug_irq_exec(struct fiq_debugger_state *state, char *cmd) { @@ -570,6 +621,8 @@ static void debug_irq_exec(struct fiq_debugger_state *state, char *cmd) if (!strcmp(cmd, "kgdb")) do_kgdb(state); #endif + if (!strncmp(cmd, "reboot", 6)) + debug_schedule_work(state, cmd); } static void debug_help(struct fiq_debugger_state *state) @@ -579,7 +632,8 @@ static void debug_help(struct fiq_debugger_state *state) " regs Register dump\n" " allregs Extended Register dump\n" " bt Stack trace\n" - " reboot Reboot\n" + " reboot [] Reboot with command \n" + " reset [] Hard reset with command \n" " irqs Interupt status\n" " kmsg Kernel log\n" " version Kernel version\n"); @@ -630,16 +684,16 @@ static bool debug_fiq_exec(struct fiq_debugger_state *state, dump_allregs(state, regs); } else if (!strcmp(cmd, "bt")) { dump_stacktrace(state, (struct pt_regs *)regs, 100, svc_sp); - } else if (!strncmp(cmd, "reboot", 6)) { - cmd += 6; + } else if (!strncmp(cmd, "reset", 5)) { + cmd += 5; while (*cmd == ' ') cmd++; if (*cmd) { char tmp_cmd[32]; strlcpy(tmp_cmd, cmd, sizeof(tmp_cmd)); - kernel_restart(tmp_cmd); + machine_restart(tmp_cmd); } else { - kernel_restart(NULL); + machine_restart(NULL); } } else if (!strcmp(cmd, "irqs")) { dump_irqs(state); @@ -1189,6 +1243,9 @@ static int fiq_debugger_probe(struct platform_device *pdev) state->signal_irq = platform_get_irq_byname(pdev, "signal"); state->wakeup_irq = platform_get_irq_byname(pdev, "wakeup"); + INIT_WORK(&state->work, debug_work); + spin_lock_init(&state->work_lock); + platform_set_drvdata(pdev, state); spin_lock_init(&state->sleep_timer_lock); -- GitLab From 9e789c939b90d7699ca2eb14095b80f267f72fbb Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 31 Oct 2012 17:41:39 -0700 Subject: [PATCH 0874/1442] ANDROID: ARM: fiq_debugger: lock between tty and console writes debug_console_write calls debug_uart_flush, which will usually wait until the serial port fifo empties. If another thread is continuously calling fiq_tty_write, the fifo will constantly be refilled and debug_uart_flush might never return. Add a spinlock that is locked in debug_console_write and fiq_tty_write to ensure they can't run at the same time. This has an extra advantage of preventing lines from the console and tty from being mixed together. Also reduce the size returned by fiq_tty_write_room to keep the time spent with the spinlock held to a reasonable value. In addition, make sure fiq context can't loop forever by never calling debug_uart_flush when the console is enabled. Change-Id: I5712b01f740ca0c84f680d2032c9fa16b7656939 Signed-off-by: Colin Cross --- arch/arm/common/fiq_debugger.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index a12810b5fb68..946a31403ce0 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -86,6 +86,7 @@ struct fiq_debugger_state { char work_cmd[DEBUG_MAX]; #ifdef CONFIG_FIQ_DEBUGGER_CONSOLE + spinlock_t console_lock; struct console console; struct tty_struct *tty; int tty_open_count; @@ -708,8 +709,9 @@ static bool debug_fiq_exec(struct fiq_debugger_state *state, state->no_sleep = true; debug_printf(state, "disabling sleep\n"); } else if (!strcmp(cmd, "console")) { - state->console_enable = true; debug_printf(state, "console mode\n"); + debug_uart_flush(state); + state->console_enable = true; } else if (!strcmp(cmd, "cpu")) { debug_printf(state, "cpu %d\n", state->current_cpu); } else if (!strncmp(cmd, "cpu ", 4)) { @@ -896,7 +898,8 @@ static bool debug_handle_uart_interrupt(struct fiq_debugger_state *state, } last_c = c; } - debug_uart_flush(state); + if (!state->console_enable) + debug_uart_flush(state); if (state->pdata->fiq_ack) state->pdata->fiq_ack(state->pdev, state->fiq); @@ -980,6 +983,7 @@ static void debug_console_write(struct console *co, const char *s, unsigned int count) { struct fiq_debugger_state *state; + unsigned long flags; state = container_of(co, struct fiq_debugger_state, console); @@ -987,12 +991,14 @@ static void debug_console_write(struct console *co, return; debug_uart_enable(state); + spin_lock_irqsave(&state->console_lock, flags); while (count--) { if (*s == '\n') debug_putc(state, '\r'); debug_putc(state, *s++); } debug_uart_flush(state); + spin_unlock_irqrestore(&state->console_lock, flags); debug_uart_disable(state); } @@ -1033,8 +1039,10 @@ int fiq_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) return count; debug_uart_enable(state); + spin_lock_irq(&state->console_lock); for (i = 0; i < count; i++) debug_putc(state, *buf++); + spin_unlock_irq(&state->console_lock); debug_uart_disable(state); return count; @@ -1042,7 +1050,7 @@ int fiq_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) int fiq_tty_write_room(struct tty_struct *tty) { - return 1024; + return 16; } #ifdef CONFIG_CONSOLE_POLL -- GitLab From b4ffddec349300cdd01cfd2a6f03afed6085237b Mon Sep 17 00:00:00 2001 From: Mars Date: Sat, 3 Nov 2012 12:15:38 +0800 Subject: [PATCH 0875/1442] ANDROID: ARM: fiq_debugger: fix uninitialised spin_lock. Backtrace: [] (dump_backtrace+0x0/0x10c) from [] (dump_stack+0x1) r6:c07a489c r5:c0c9b9dc r4:00000002 r3:271aed3b [] (dump_stack+0x0/0x1c) from [] (__lock_acquire+0x93) [] (__lock_acquire+0x0/0xad4) from [] (lock_acquire+0) [] (lock_acquire+0x0/0xa4) from [] (_raw_spin_lock_ir) [] (_raw_spin_lock_irq+0x0/0x5c) from [] (fiq_tty_wri) r5:e30f0000 r4:e36f0c00 [] (fiq_tty_write+0x0/0x80) from [] (n_tty_write+0x18) r8:e370fc40 r7:e378a000 r6:e3572d1c r5:e36f0c00 r4:00000002 r3:c005293c [] (n_tty_write+0x0/0x440) from [] (tty_write+0x100/0) [] (tty_write+0x0/0x2a8) from [] (vfs_write+0xa4/0x14) [] (vfs_write+0x0/0x148) from [] (sys_write+0x40/0x78) r8:00000002 r7:4076d2c4 r6:e370fc40 r5:00000000 r4:00000000 [] (sys_write+0x0/0x78) from [] (ret_fast_syscall+0x0) r8:c0041908 r7:00000004 r6:00000002 r5:00000000 r4:4007cbe0 [ccross: moved spin_lock_init into existing #ifdef] Change-Id: If400d084eb20433c126ea1dd027a6be7f2ebb1f6 Signed-off-by: Mars Signed-off-by: Colin Cross --- arch/arm/common/fiq_debugger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index 946a31403ce0..053680b6c326 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -1340,6 +1340,7 @@ static int fiq_debugger_probe(struct platform_device *pdev) handle_wakeup(state); #if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) + spin_lock_init(&state->console_lock); state->console = fiq_debugger_console; state->console.index = pdev->id; if (!console_set_on_cmdline) -- GitLab From d05890d37ecc4253672fc1668d6beb2a4d6405e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Mon, 26 Nov 2012 16:23:33 -0800 Subject: [PATCH 0876/1442] ANDROID: ARM: fiq_debugger: Fix to compile on 3.7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use for_each_irq_desc in arch/arm/common/fiq_debugger.c Signed-off-by: Arve Hjønnevåg --- arch/arm/common/fiq_debugger.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index 053680b6c326..5e9005bfdbb9 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -374,16 +374,17 @@ static void dump_allregs(struct fiq_debugger_state *state, unsigned *regs) static void dump_irqs(struct fiq_debugger_state *state) { int n; + struct irq_desc *desc; debug_printf(state, "irqnr total since-last status name\n"); - for (n = 0; n < NR_IRQS; n++) { - struct irqaction *act = irq_desc[n].action; + for_each_irq_desc(n, desc) { + struct irqaction *act = desc->action; if (!act && !kstat_irqs(n)) continue; debug_printf(state, "%5d: %10u %11u %8x %s\n", n, kstat_irqs(n), kstat_irqs(n) - state->last_irqs[n], - irq_desc[n].status_use_accessors, + desc->status_use_accessors, (act && act->name) ? act->name : "???"); state->last_irqs[n] = kstat_irqs(n); } -- GitLab From 9de62e0d3f24c13c18dc5ee61a029968b1598bfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Mon, 26 Nov 2012 20:05:37 -0800 Subject: [PATCH 0877/1442] ANDROID: ARM: fiq_debugger: Use kmsg_dumper to dump kernel logs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arve Hjønnevåg --- arch/arm/common/fiq_debugger.c | 40 +++++++++------------------------- 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index 5e9005bfdbb9..eabd94b98f4b 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -207,29 +208,19 @@ static void debug_prompt(struct fiq_debugger_state *state) debug_puts(state, "debug> "); } -int log_buf_copy(char *dest, int idx, int len); static void dump_kernel_log(struct fiq_debugger_state *state) { - char buf[1024]; - int idx = 0; - int ret; - int saved_oip; + char buf[512]; + size_t len; + struct kmsg_dumper dumper = { .active = true }; - /* setting oops_in_progress prevents log_buf_copy() - * from trying to take a spinlock which will make it - * very unhappy in some cases... - */ - saved_oip = oops_in_progress; - oops_in_progress = 1; - for (;;) { - ret = log_buf_copy(buf, idx, 1023); - if (ret <= 0) - break; - buf[ret] = 0; + + kmsg_dump_rewind_nolock(&dumper); + while (kmsg_dump_get_line_nolock(&dumper, true, buf, + sizeof(buf) - 1, &len)) { + buf[len] = 0; debug_puts(state, buf); - idx += ret; } - oops_in_progress = saved_oip; } static char *mode_name(unsigned cpsr) @@ -523,18 +514,7 @@ static void begin_syslog_dump(struct fiq_debugger_state *state) static void end_syslog_dump(struct fiq_debugger_state *state) { - char buf[128]; - int ret; - int idx = 0; - - while (1) { - ret = log_buf_copy(buf, idx, sizeof(buf) - 1); - if (ret <= 0) - break; - buf[ret] = 0; - debug_printf(state, "%s", buf); - idx += ret; - } + dump_kernel_log(state); } #endif -- GitLab From 053d5478aa7e7342a19f29d3e0871fb94f1de862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Tue, 15 Jan 2013 15:10:31 -0800 Subject: [PATCH 0878/1442] ANDROID: ARM: fiq_debugger: Update tty code for 3.9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arve Hjønnevåg --- arch/arm/common/fiq_debugger.c | 63 ++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index eabd94b98f4b..65b943c76300 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -89,8 +89,7 @@ struct fiq_debugger_state { #ifdef CONFIG_FIQ_DEBUGGER_CONSOLE spinlock_t console_lock; struct console console; - struct tty_struct *tty; - int tty_open_count; + struct tty_port tty_port; struct fiq_debugger_ringbuf *tty_rbuf; bool syslog_dumping; #endif @@ -768,30 +767,34 @@ static irqreturn_t wakeup_irq_handler(int irq, void *dev) return IRQ_HANDLED; } - -static void debug_handle_irq_context(struct fiq_debugger_state *state) +static void debug_handle_console_irq_context(struct fiq_debugger_state *state) { - if (!state->no_sleep) { - unsigned long flags; - - spin_lock_irqsave(&state->sleep_timer_lock, flags); - wake_lock(&state->debugger_wake_lock); - mod_timer(&state->sleep_timer, jiffies + HZ * 5); - spin_unlock_irqrestore(&state->sleep_timer_lock, flags); - } #if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) - if (state->tty) { + if (state->tty_port.ops) { int i; int count = fiq_debugger_ringbuf_level(state->tty_rbuf); for (i = 0; i < count; i++) { int c = fiq_debugger_ringbuf_peek(state->tty_rbuf, 0); - tty_insert_flip_char(state->tty, c, TTY_NORMAL); + tty_insert_flip_char(&state->tty_port, c, TTY_NORMAL); if (!fiq_debugger_ringbuf_consume(state->tty_rbuf, 1)) pr_warn("fiq tty failed to consume byte\n"); } - tty_flip_buffer_push(state->tty); + tty_flip_buffer_push(&state->tty_port); } #endif +} + +static void debug_handle_irq_context(struct fiq_debugger_state *state) +{ + if (!state->no_sleep) { + unsigned long flags; + + spin_lock_irqsave(&state->sleep_timer_lock, flags); + wake_lock(&state->debugger_wake_lock); + mod_timer(&state->sleep_timer, jiffies + HZ * 5); + spin_unlock_irqrestore(&state->sleep_timer_lock, flags); + } + debug_handle_console_irq_context(state); if (state->debug_busy) { debug_irq_exec(state, state->debug_cmd); if (!state->console_enable) @@ -995,26 +998,21 @@ int fiq_tty_open(struct tty_struct *tty, struct file *filp) int line = tty->index; struct fiq_debugger_state **states = tty->driver->driver_state; struct fiq_debugger_state *state = states[line]; - if (state->tty_open_count++) - return 0; - tty->driver_data = state; - state->tty = tty; - return 0; + return tty_port_open(&state->tty_port, tty, filp); } void fiq_tty_close(struct tty_struct *tty, struct file *filp) { - struct fiq_debugger_state *state = tty->driver_data; - if (--state->tty_open_count) - return; - state->tty = NULL; + tty_port_close(tty->port, tty, filp); } int fiq_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) { int i; - struct fiq_debugger_state *state = tty->driver_data; + int line = tty->index; + struct fiq_debugger_state **states = tty->driver->driver_state; + struct fiq_debugger_state *state = states[line]; if (!state->console_enable) return count; @@ -1042,7 +1040,8 @@ static int fiq_tty_poll_init(struct tty_driver *driver, int line, char *options) static int fiq_tty_poll_get_char(struct tty_driver *driver, int line) { - struct fiq_debugger_state *state = driver->ttys[line]->driver_data; + struct fiq_debugger_state **states = driver->driver_state; + struct fiq_debugger_state *state = states[line]; int c = NO_POLL_CHAR; debug_uart_enable(state); @@ -1064,13 +1063,16 @@ static int fiq_tty_poll_get_char(struct tty_driver *driver, int line) static void fiq_tty_poll_put_char(struct tty_driver *driver, int line, char ch) { - struct fiq_debugger_state *state = driver->ttys[line]->driver_data; + struct fiq_debugger_state **states = driver->driver_state; + struct fiq_debugger_state *state = states[line]; debug_uart_enable(state); debug_putc(state, ch); debug_uart_disable(state); } #endif +static const struct tty_port_operations fiq_tty_port_ops; + static const struct tty_operations fiq_tty_driver_ops = { .write = fiq_tty_write, .write_room = fiq_tty_write_room, @@ -1150,8 +1152,11 @@ static int fiq_debugger_tty_init_one(struct fiq_debugger_state *state) goto err; } - tty_dev = tty_register_device(fiq_tty_driver, state->pdev->id, - &state->pdev->dev); + tty_port_init(&state->tty_port); + state->tty_port.ops = &fiq_tty_port_ops; + + tty_dev = tty_port_register_device(&state->tty_port, fiq_tty_driver, + state->pdev->id, &state->pdev->dev); if (IS_ERR(tty_dev)) { pr_err("Failed to register fiq debugger tty device\n"); ret = PTR_ERR(tty_dev); -- GitLab From dad5647a8ca3d5897aae0845343d4ec9e415389b Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Wed, 3 Jul 2013 15:48:04 -0700 Subject: [PATCH 0879/1442] ANDROID: ARM: kgdb: ignore breakpoint instructions from user mode Avoid conflicts with user mode usage of the same instructions, as with Clang -ftrapv. Change-Id: I12d1c6d8f94376bfd2503cb0be843d7e478fb6ea Signed-off-by: Todd Poynor --- arch/arm/kernel/kgdb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 9232caee7060..f3c662299531 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -140,6 +140,8 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr) { + if (user_mode(regs)) + return -1; kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; @@ -147,6 +149,8 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) { + if (user_mode(regs)) + return -1; compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs); -- GitLab From 6c0dda2cf3d2ac04c2652cd7c2c656ff3cf77cc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Mon, 24 Jun 2013 18:02:05 -0700 Subject: [PATCH 0880/1442] ANDROID: ARM: fiq_glue: Add custom fiq return handler api. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I5ff2764e85151ca0a88576542fda07c2d33dd065 Signed-off-by: Arve Hjønnevåg --- arch/arm/common/fiq_glue.S | 25 +++++++++------ arch/arm/common/fiq_glue_setup.c | 53 ++++++++++++++++++++++++++++++-- arch/arm/include/asm/fiq_glue.h | 3 ++ 3 files changed, 69 insertions(+), 12 deletions(-) diff --git a/arch/arm/common/fiq_glue.S b/arch/arm/common/fiq_glue.S index 9e3455a09f8f..24b42cec4813 100644 --- a/arch/arm/common/fiq_glue.S +++ b/arch/arm/common/fiq_glue.S @@ -22,13 +22,14 @@ /* fiq stack: r0-r15,cpsr,spsr of interrupted mode */ ENTRY(fiq_glue) - /* store pc, cpsr from previous mode */ + /* store pc, cpsr from previous mode, reserve space for spsr */ mrs r12, spsr - sub r11, lr, #4 + sub lr, lr, #4 subs r10, #1 bne nested_fiq - stmfd sp!, {r11-r12, lr} + str r12, [sp, #-8]! + str lr, [sp, #-4]! /* store r8-r14 from previous mode */ sub sp, sp, #(7 * 4) @@ -85,12 +86,15 @@ fiq_from_usr_mode_exit: msr cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT) ldmfd sp!, {r0-r7} - add sp, sp, #(7 * 4) - ldmfd sp!, {r11-r12, lr} + ldr lr, [sp, #(4 * 7)] + ldr r12, [sp, #(4 * 8)] + add sp, sp, #(10 * 4) exit_fiq: msr spsr_cxsf, r12 add r10, #1 - movs pc, r11 + cmp r11, #0 + moveqs pc, lr + bx r11 /* jump to custom fiq return function */ nested_fiq: orr r12, r12, #(PSR_F_BIT) @@ -98,14 +102,17 @@ nested_fiq: fiq_glue_end: -ENTRY(fiq_glue_setup) /* func, data, sp */ - mrs r3, cpsr +ENTRY(fiq_glue_setup) /* func, data, sp, smc call number */ + stmfd sp!, {r4} + mrs r4, cpsr msr cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT) movs r8, r0 mov r9, r1 mov sp, r2 + mov r11, r3 moveq r10, #0 movne r10, #1 - msr cpsr_c, r3 + msr cpsr_c, r4 + ldmfd sp!, {r4} bx lr diff --git a/arch/arm/common/fiq_glue_setup.c b/arch/arm/common/fiq_glue_setup.c index 4044c7db95c8..8cb1b611c6d5 100644 --- a/arch/arm/common/fiq_glue_setup.c +++ b/arch/arm/common/fiq_glue_setup.c @@ -18,20 +18,23 @@ #include extern unsigned char fiq_glue, fiq_glue_end; -extern void fiq_glue_setup(void *func, void *data, void *sp); +extern void fiq_glue_setup(void *func, void *data, void *sp, + fiq_return_handler_t fiq_return_handler); static struct fiq_handler fiq_debbuger_fiq_handler = { .name = "fiq_glue", }; DEFINE_PER_CPU(void *, fiq_stack); static struct fiq_glue_handler *current_handler; +static fiq_return_handler_t fiq_return_handler; static DEFINE_MUTEX(fiq_glue_lock); static void fiq_glue_setup_helper(void *info) { struct fiq_glue_handler *handler = info; fiq_glue_setup(handler->fiq, handler, - __get_cpu_var(fiq_stack) + THREAD_START_SP); + __get_cpu_var(fiq_stack) + THREAD_START_SP, + fiq_return_handler); } int fiq_glue_register_handler(struct fiq_glue_handler *handler) @@ -80,6 +83,49 @@ int fiq_glue_register_handler(struct fiq_glue_handler *handler) return ret; } +static void fiq_glue_update_return_handler(void (*fiq_return)(void)) +{ + fiq_return_handler = fiq_return; + if (current_handler) + on_each_cpu(fiq_glue_setup_helper, current_handler, true); +} + +int fiq_glue_set_return_handler(void (*fiq_return)(void)) +{ + int ret; + + mutex_lock(&fiq_glue_lock); + if (fiq_return_handler) { + ret = -EBUSY; + goto err_busy; + } + fiq_glue_update_return_handler(fiq_return); + ret = 0; +err_busy: + mutex_unlock(&fiq_glue_lock); + + return ret; +} +EXPORT_SYMBOL(fiq_glue_set_return_handler); + +int fiq_glue_clear_return_handler(void (*fiq_return)(void)) +{ + int ret; + + mutex_lock(&fiq_glue_lock); + if (WARN_ON(fiq_return_handler != fiq_return)) { + ret = -EINVAL; + goto err_inval; + } + fiq_glue_update_return_handler(NULL); + ret = 0; +err_inval: + mutex_unlock(&fiq_glue_lock); + + return ret; +} +EXPORT_SYMBOL(fiq_glue_clear_return_handler); + /** * fiq_glue_resume - Restore fiqs after suspend or low power idle states * @@ -93,7 +139,8 @@ void fiq_glue_resume(void) if (!current_handler) return; fiq_glue_setup(current_handler->fiq, current_handler, - __get_cpu_var(fiq_stack) + THREAD_START_SP); + __get_cpu_var(fiq_stack) + THREAD_START_SP, + fiq_return_handler); if (current_handler->resume) current_handler->resume(current_handler); } diff --git a/arch/arm/include/asm/fiq_glue.h b/arch/arm/include/asm/fiq_glue.h index d54c29db97a8..a9e244f9f197 100644 --- a/arch/arm/include/asm/fiq_glue.h +++ b/arch/arm/include/asm/fiq_glue.h @@ -18,8 +18,11 @@ struct fiq_glue_handler { void (*fiq)(struct fiq_glue_handler *h, void *regs, void *svc_sp); void (*resume)(struct fiq_glue_handler *h); }; +typedef void (*fiq_return_handler_t)(void); int fiq_glue_register_handler(struct fiq_glue_handler *handler); +int fiq_glue_set_return_handler(fiq_return_handler_t fiq_return); +int fiq_glue_clear_return_handler(fiq_return_handler_t fiq_return); #ifdef CONFIG_FIQ_GLUE void fiq_glue_resume(void); -- GitLab From 66bbef2e39dc114c93590df35ebaedcf630eb7a2 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 2 Apr 2014 18:30:04 -0700 Subject: [PATCH 0881/1442] ANDROID: fiq_debugger: move into drivers/staging/android/fiq_debugger/ Move fiq_debugger into drivers/staging/android/fiq_debugger/ to allow for sharing between ARM and ARM64. Change-Id: I6ca5e8b7e3d000f57da3234260261c5592cef2a8 Signed-off-by: Colin Cross --- arch/arm/common/Kconfig | 46 ------------------- arch/arm/common/Makefile | 1 - drivers/staging/android/Kconfig | 2 + drivers/staging/android/Makefile | 1 + drivers/staging/android/fiq_debugger/Kconfig | 43 +++++++++++++++++ drivers/staging/android/fiq_debugger/Makefile | 1 + .../android/fiq_debugger}/fiq_debugger.c | 4 +- .../android/fiq_debugger}/fiq_debugger.h | 2 +- .../fiq_debugger}/fiq_debugger_ringbuf.h | 2 +- 9 files changed, 51 insertions(+), 51 deletions(-) create mode 100644 drivers/staging/android/fiq_debugger/Kconfig create mode 100644 drivers/staging/android/fiq_debugger/Makefile rename {arch/arm/common => drivers/staging/android/fiq_debugger}/fiq_debugger.c (99%) rename {arch/arm/include/asm => drivers/staging/android/fiq_debugger}/fiq_debugger.h (97%) rename {arch/arm/common => drivers/staging/android/fiq_debugger}/fiq_debugger_ringbuf.h (96%) diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig index 992d4046bb8a..ce01364a96e3 100644 --- a/arch/arm/common/Kconfig +++ b/arch/arm/common/Kconfig @@ -21,49 +21,3 @@ config SHARP_SCOOP config FIQ_GLUE bool select FIQ - -config FIQ_DEBUGGER - bool "FIQ Mode Serial Debugger" - select FIQ - select FIQ_GLUE - default n - help - The FIQ serial debugger can accept commands even when the - kernel is unresponsive due to being stuck with interrupts - disabled. - - -config FIQ_DEBUGGER_NO_SLEEP - bool "Keep serial debugger active" - depends on FIQ_DEBUGGER - default n - help - Enables the serial debugger at boot. Passing - fiq_debugger.no_sleep on the kernel commandline will - override this config option. - -config FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON - bool "Don't disable wakeup IRQ when debugger is active" - depends on FIQ_DEBUGGER - default n - help - Don't disable the wakeup irq when enabling the uart clock. This will - cause extra interrupts, but it makes the serial debugger usable with - on some MSM radio builds that ignore the uart clock request in power - collapse. - -config FIQ_DEBUGGER_CONSOLE - bool "Console on FIQ Serial Debugger port" - depends on FIQ_DEBUGGER - default n - help - Enables a console so that printk messages are displayed on - the debugger serial port as the occur. - -config FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE - bool "Put the FIQ debugger into console mode by default" - depends on FIQ_DEBUGGER_CONSOLE - default n - help - If enabled, this puts the fiq debugger into console mode by default. - Otherwise, the fiq debugger will start out in debug mode. diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 707dcdf629d0..04aca896b338 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -4,7 +4,6 @@ obj-y += firmware.o -obj-$(CONFIG_FIQ_DEBUGGER) += fiq_debugger.o obj-$(CONFIG_FIQ_GLUE) += fiq_glue.o fiq_glue_setup.o obj-$(CONFIG_ICST) += icst.o obj-$(CONFIG_SA1111) += sa1111.o diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index e41327aad492..7bd27a42b9e8 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -35,6 +35,8 @@ config ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES source "drivers/staging/android/ion/Kconfig" +source "drivers/staging/android/fiq_debugger/Kconfig" + endif # if ANDROID endmenu diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile index 7ed1be798909..21b0ff4a158a 100644 --- a/drivers/staging/android/Makefile +++ b/drivers/staging/android/Makefile @@ -1,6 +1,7 @@ ccflags-y += -I$(src) # needed for trace events obj-y += ion/ +obj-$(CONFIG_FIQ_DEBUGGER) += fiq_debugger/ obj-$(CONFIG_ASHMEM) += ashmem.o obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER) += lowmemorykiller.o diff --git a/drivers/staging/android/fiq_debugger/Kconfig b/drivers/staging/android/fiq_debugger/Kconfig new file mode 100644 index 000000000000..803fb59cc82f --- /dev/null +++ b/drivers/staging/android/fiq_debugger/Kconfig @@ -0,0 +1,43 @@ +config FIQ_DEBUGGER + bool "FIQ Mode Serial Debugger" + default n + depends on ARM + help + The FIQ serial debugger can accept commands even when the + kernel is unresponsive due to being stuck with interrupts + disabled. + +config FIQ_DEBUGGER_NO_SLEEP + bool "Keep serial debugger active" + depends on FIQ_DEBUGGER + default n + help + Enables the serial debugger at boot. Passing + fiq_debugger.no_sleep on the kernel commandline will + override this config option. + +config FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON + bool "Don't disable wakeup IRQ when debugger is active" + depends on FIQ_DEBUGGER + default n + help + Don't disable the wakeup irq when enabling the uart clock. This will + cause extra interrupts, but it makes the serial debugger usable with + on some MSM radio builds that ignore the uart clock request in power + collapse. + +config FIQ_DEBUGGER_CONSOLE + bool "Console on FIQ Serial Debugger port" + depends on FIQ_DEBUGGER + default n + help + Enables a console so that printk messages are displayed on + the debugger serial port as the occur. + +config FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE + bool "Put the FIQ debugger into console mode by default" + depends on FIQ_DEBUGGER_CONSOLE + default n + help + If enabled, this puts the fiq debugger into console mode by default. + Otherwise, the fiq debugger will start out in debug mode. diff --git a/drivers/staging/android/fiq_debugger/Makefile b/drivers/staging/android/fiq_debugger/Makefile new file mode 100644 index 000000000000..437037852963 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/Makefile @@ -0,0 +1 @@ +obj-y += fiq_debugger.o diff --git a/arch/arm/common/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c similarity index 99% rename from arch/arm/common/fiq_debugger.c rename to drivers/staging/android/fiq_debugger/fiq_debugger.c index 65b943c76300..f93d7526b87a 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -1,5 +1,5 @@ /* - * arch/arm/common/fiq_debugger.c + * drivers/staging/android/fiq_debugger.c * * Serial Debugger Interface accessed through an FIQ interrupt. * @@ -35,12 +35,12 @@ #include #include -#include #include #include #include +#include "fiq_debugger.h" #include "fiq_debugger_ringbuf.h" #define DEBUG_MAX 64 diff --git a/arch/arm/include/asm/fiq_debugger.h b/drivers/staging/android/fiq_debugger/fiq_debugger.h similarity index 97% rename from arch/arm/include/asm/fiq_debugger.h rename to drivers/staging/android/fiq_debugger/fiq_debugger.h index 4d274883ba6a..c9ec4f8db086 100644 --- a/arch/arm/include/asm/fiq_debugger.h +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.h @@ -1,5 +1,5 @@ /* - * arch/arm/include/asm/fiq_debugger.h + * drivers/staging/android/fiq_debugger/fiq_debugger.h * * Copyright (C) 2010 Google, Inc. * Author: Colin Cross diff --git a/arch/arm/common/fiq_debugger_ringbuf.h b/drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h similarity index 96% rename from arch/arm/common/fiq_debugger_ringbuf.h rename to drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h index 2649b5581088..10c3c5d09098 100644 --- a/arch/arm/common/fiq_debugger_ringbuf.h +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h @@ -1,5 +1,5 @@ /* - * arch/arm/common/fiq_debugger_ringbuf.c + * drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h * * simple lockless ringbuffer * -- GitLab From a6bec1f108c4768497452af1d7145b2b031c1069 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Fri, 4 Apr 2014 22:58:23 -0700 Subject: [PATCH 0882/1442] ANDROID: fiq_debugger: rename debug->fiq_debugger Rename variables and functions in the global namespace to avoid future collisions. Change-Id: Ic23a304b0f794efc94cc6d086fddd63231d99c98 Signed-off-by: Colin Cross --- .../android/fiq_debugger/fiq_debugger.c | 405 ++++++++++-------- 1 file changed, 220 insertions(+), 185 deletions(-) diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c index f93d7526b87a..c3a862790239 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -124,10 +124,13 @@ module_param_named(console_enable, initial_console_enable, bool, 0644); module_param_named(kgdb_enable, fiq_kgdb_enable, bool, 0644); #ifdef CONFIG_FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON -static inline void enable_wakeup_irq(struct fiq_debugger_state *state) {} -static inline void disable_wakeup_irq(struct fiq_debugger_state *state) {} +static inline +void fiq_debugger_enable_wakeup_irq(struct fiq_debugger_state *state) {} +static inline +void fiq_debugger_disable_wakeup_irq(struct fiq_debugger_state *state) {} #else -static inline void enable_wakeup_irq(struct fiq_debugger_state *state) +static inline +void fiq_debugger_enable_wakeup_irq(struct fiq_debugger_state *state) { if (state->wakeup_irq < 0) return; @@ -135,7 +138,8 @@ static inline void enable_wakeup_irq(struct fiq_debugger_state *state) if (!state->wakeup_irq_no_set_wake) enable_irq_wake(state->wakeup_irq); } -static inline void disable_wakeup_irq(struct fiq_debugger_state *state) +static inline +void fiq_debugger_disable_wakeup_irq(struct fiq_debugger_state *state) { if (state->wakeup_irq < 0) return; @@ -145,16 +149,16 @@ static inline void disable_wakeup_irq(struct fiq_debugger_state *state) } #endif -static bool inline debug_have_fiq(struct fiq_debugger_state *state) +static inline bool fiq_debugger_have_fiq(struct fiq_debugger_state *state) { return (state->fiq >= 0); } -static void debug_force_irq(struct fiq_debugger_state *state) +static void fiq_debugger_force_irq(struct fiq_debugger_state *state) { unsigned int irq = state->signal_irq; - if (WARN_ON(!debug_have_fiq(state))) + if (WARN_ON(!fiq_debugger_have_fiq(state))) return; if (state->pdata->force_irq) { state->pdata->force_irq(state->pdev, irq); @@ -165,7 +169,7 @@ static void debug_force_irq(struct fiq_debugger_state *state) } } -static void debug_uart_enable(struct fiq_debugger_state *state) +static void fiq_debugger_uart_enable(struct fiq_debugger_state *state) { if (state->clk) clk_enable(state->clk); @@ -173,7 +177,7 @@ static void debug_uart_enable(struct fiq_debugger_state *state) state->pdata->uart_enable(state->pdev); } -static void debug_uart_disable(struct fiq_debugger_state *state) +static void fiq_debugger_uart_disable(struct fiq_debugger_state *state) { if (state->pdata->uart_disable) state->pdata->uart_disable(state->pdev); @@ -181,33 +185,33 @@ static void debug_uart_disable(struct fiq_debugger_state *state) clk_disable(state->clk); } -static void debug_uart_flush(struct fiq_debugger_state *state) +static void fiq_debugger_uart_flush(struct fiq_debugger_state *state) { if (state->pdata->uart_flush) state->pdata->uart_flush(state->pdev); } -static void debug_putc(struct fiq_debugger_state *state, char c) +static void fiq_debugger_putc(struct fiq_debugger_state *state, char c) { state->pdata->uart_putc(state->pdev, c); } -static void debug_puts(struct fiq_debugger_state *state, char *s) +static void fiq_debugger_puts(struct fiq_debugger_state *state, char *s) { unsigned c; while ((c = *s++)) { if (c == '\n') - debug_putc(state, '\r'); - debug_putc(state, c); + fiq_debugger_putc(state, '\r'); + fiq_debugger_putc(state, c); } } -static void debug_prompt(struct fiq_debugger_state *state) +static void fiq_debugger_prompt(struct fiq_debugger_state *state) { - debug_puts(state, "debug> "); + fiq_debugger_puts(state, "debug> "); } -static void dump_kernel_log(struct fiq_debugger_state *state) +static void fiq_debugger_dump_kernel_log(struct fiq_debugger_state *state) { char buf[512]; size_t len; @@ -218,7 +222,7 @@ static void dump_kernel_log(struct fiq_debugger_state *state) while (kmsg_dump_get_line_nolock(&dumper, true, buf, sizeof(buf) - 1, &len)) { buf[len] = 0; - debug_puts(state, buf); + fiq_debugger_puts(state, buf); } } @@ -236,7 +240,7 @@ static char *mode_name(unsigned cpsr) } } -static int debug_printf(void *cookie, const char *fmt, ...) +static int fiq_debugger_printf(void *cookie, const char *fmt, ...) { struct fiq_debugger_state *state = cookie; char buf[256]; @@ -246,12 +250,12 @@ static int debug_printf(void *cookie, const char *fmt, ...) vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); - debug_puts(state, buf); + fiq_debugger_puts(state, buf); return state->debug_abort; } /* Safe outside fiq context */ -static int debug_printf_nfiq(void *cookie, const char *fmt, ...) +static int fiq_debugger_printf_nfiq(void *cookie, const char *fmt, ...) { struct fiq_debugger_state *state = cookie; char buf[256]; @@ -263,29 +267,35 @@ static int debug_printf_nfiq(void *cookie, const char *fmt, ...) va_end(ap); local_irq_save(irq_flags); - debug_puts(state, buf); - debug_uart_flush(state); + fiq_debugger_puts(state, buf); + fiq_debugger_uart_flush(state); local_irq_restore(irq_flags); return state->debug_abort; } -static void dump_regs(struct fiq_debugger_state *state, unsigned *regs) +static void fiq_debugger_dump_regs(struct fiq_debugger_state *state, + unsigned *regs) { - debug_printf(state, " r0 %08x r1 %08x r2 %08x r3 %08x\n", + fiq_debugger_printf(state, + " r0 %08x r1 %08x r2 %08x r3 %08x\n", regs[0], regs[1], regs[2], regs[3]); - debug_printf(state, " r4 %08x r5 %08x r6 %08x r7 %08x\n", + fiq_debugger_printf(state, + " r4 %08x r5 %08x r6 %08x r7 %08x\n", regs[4], regs[5], regs[6], regs[7]); - debug_printf(state, " r8 %08x r9 %08x r10 %08x r11 %08x mode %s\n", + fiq_debugger_printf(state, + " r8 %08x r9 %08x r10 %08x r11 %08x mode %s\n", regs[8], regs[9], regs[10], regs[11], mode_name(regs[16])); if ((regs[16] & MODE_MASK) == USR_MODE) - debug_printf(state, " ip %08x sp %08x lr %08x pc %08x " - "cpsr %08x\n", regs[12], regs[13], regs[14], - regs[15], regs[16]); + fiq_debugger_printf(state, + " ip %08x sp %08x lr %08x pc %08x cpsr %08x\n", + regs[12], regs[13], regs[14], regs[15], + regs[16]); else - debug_printf(state, " ip %08x sp %08x lr %08x pc %08x " - "cpsr %08x spsr %08x\n", regs[12], regs[13], - regs[14], regs[15], regs[16], regs[17]); + fiq_debugger_printf(state, + " ip %08x sp %08x lr %08x pc %08x cpsr %08x spsr %08x\n", + regs[12], regs[13], regs[14], regs[15], + regs[16], regs[17]); } struct mode_regs { @@ -340,38 +350,45 @@ void __naked get_mode_regs(struct mode_regs *regs) } -static void dump_allregs(struct fiq_debugger_state *state, unsigned *regs) +static void fiq_debugger_dump_allregs(struct fiq_debugger_state *state, + unsigned *regs) { struct mode_regs mode_regs; - dump_regs(state, regs); + fiq_debugger_dump_regs(state, regs); get_mode_regs(&mode_regs); - debug_printf(state, " svc: sp %08x lr %08x spsr %08x\n", + fiq_debugger_printf(state, + " svc: sp %08x lr %08x spsr %08x\n", mode_regs.sp_svc, mode_regs.lr_svc, mode_regs.spsr_svc); - debug_printf(state, " abt: sp %08x lr %08x spsr %08x\n", + fiq_debugger_printf(state, + " abt: sp %08x lr %08x spsr %08x\n", mode_regs.sp_abt, mode_regs.lr_abt, mode_regs.spsr_abt); - debug_printf(state, " und: sp %08x lr %08x spsr %08x\n", + fiq_debugger_printf(state, + " und: sp %08x lr %08x spsr %08x\n", mode_regs.sp_und, mode_regs.lr_und, mode_regs.spsr_und); - debug_printf(state, " irq: sp %08x lr %08x spsr %08x\n", + fiq_debugger_printf(state, + " irq: sp %08x lr %08x spsr %08x\n", mode_regs.sp_irq, mode_regs.lr_irq, mode_regs.spsr_irq); - debug_printf(state, " fiq: r8 %08x r9 %08x r10 %08x r11 %08x " - "r12 %08x\n", + fiq_debugger_printf(state, + " fiq: r8 %08x r9 %08x r10 %08x r11 %08x r12 %08x\n", mode_regs.r8_fiq, mode_regs.r9_fiq, mode_regs.r10_fiq, mode_regs.r11_fiq, mode_regs.r12_fiq); - debug_printf(state, " fiq: sp %08x lr %08x spsr %08x\n", + fiq_debugger_printf(state, + " fiq: sp %08x lr %08x spsr %08x\n", mode_regs.sp_fiq, mode_regs.lr_fiq, mode_regs.spsr_fiq); } -static void dump_irqs(struct fiq_debugger_state *state) +static void fiq_debugger_dump_irqs(struct fiq_debugger_state *state) { int n; struct irq_desc *desc; - debug_printf(state, "irqnr total since-last status name\n"); + fiq_debugger_printf(state, + "irqnr total since-last status name\n"); for_each_irq_desc(n, desc) { struct irqaction *act = desc->action; if (!act && !kstat_irqs(n)) continue; - debug_printf(state, "%5d: %10u %11u %8x %s\n", n, + fiq_debugger_printf(state, "%5d: %10u %11u %8x %s\n", n, kstat_irqs(n), kstat_irqs(n) - state->last_irqs[n], desc->status_use_accessors, @@ -390,14 +407,14 @@ static int report_trace(struct stackframe *frame, void *d) struct stacktrace_state *sts = d; if (sts->depth) { - debug_printf(sts->state, + fiq_debugger_printf(sts->state, " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", frame->pc, frame->pc, frame->lr, frame->lr, frame->sp, frame->fp); sts->depth--; return 0; } - debug_printf(sts->state, " ...\n"); + fiq_debugger_printf(sts->state, " ...\n"); return sts->depth == 0; } @@ -415,16 +432,17 @@ static struct frame_tail *user_backtrace(struct fiq_debugger_state *state, /* Also check accessibility of one struct frame_tail beyond */ if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) { - debug_printf(state, " invalid frame pointer %p\n", tail); + fiq_debugger_printf(state, " invalid frame pointer %p\n", + tail); return NULL; } if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) { - debug_printf(state, + fiq_debugger_printf(state, " failed to copy frame pointer %p\n", tail); return NULL; } - debug_printf(state, " %p\n", buftail[0].lr); + fiq_debugger_printf(state, " %p\n", buftail[0].lr); /* frame pointers should strictly progress back up the stack * (towards higher addresses) */ @@ -434,7 +452,7 @@ static struct frame_tail *user_backtrace(struct fiq_debugger_state *state, return buftail[0].fp-1; } -void dump_stacktrace(struct fiq_debugger_state *state, +void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, struct pt_regs * const regs, unsigned int depth, void *ssp) { struct frame_tail *tail; @@ -446,11 +464,11 @@ void dump_stacktrace(struct fiq_debugger_state *state, *current_thread_info() = *real_thread_info; if (!current) - debug_printf(state, "current NULL\n"); + fiq_debugger_printf(state, "current NULL\n"); else - debug_printf(state, "pid: %d comm: %s\n", + fiq_debugger_printf(state, "pid: %d comm: %s\n", current->pid, current->comm); - dump_regs(state, (unsigned *)regs); + fiq_debugger_dump_regs(state, (unsigned *)regs); if (!user_mode(regs)) { struct stackframe frame; @@ -458,7 +476,7 @@ void dump_stacktrace(struct fiq_debugger_state *state, frame.sp = regs->ARM_sp; frame.lr = regs->ARM_lr; frame.pc = regs->ARM_pc; - debug_printf(state, + fiq_debugger_printf(state, " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", regs->ARM_pc, regs->ARM_pc, regs->ARM_lr, regs->ARM_lr, regs->ARM_sp, regs->ARM_fp); @@ -471,84 +489,86 @@ void dump_stacktrace(struct fiq_debugger_state *state, tail = user_backtrace(state, tail); } -static void do_ps(struct fiq_debugger_state *state) +static void fiq_debugger_do_ps(struct fiq_debugger_state *state) { struct task_struct *g; struct task_struct *p; unsigned task_state; static const char stat_nam[] = "RSDTtZX"; - debug_printf(state, "pid ppid prio task pc\n"); + fiq_debugger_printf(state, "pid ppid prio task pc\n"); read_lock(&tasklist_lock); do_each_thread(g, p) { task_state = p->state ? __ffs(p->state) + 1 : 0; - debug_printf(state, + fiq_debugger_printf(state, "%5d %5d %4d ", p->pid, p->parent->pid, p->prio); - debug_printf(state, "%-13.13s %c", p->comm, + fiq_debugger_printf(state, "%-13.13s %c", p->comm, task_state >= sizeof(stat_nam) ? '?' : stat_nam[task_state]); if (task_state == TASK_RUNNING) - debug_printf(state, " running\n"); + fiq_debugger_printf(state, " running\n"); else - debug_printf(state, " %08lx\n", thread_saved_pc(p)); + fiq_debugger_printf(state, " %08lx\n", + thread_saved_pc(p)); } while_each_thread(g, p); read_unlock(&tasklist_lock); } #ifdef CONFIG_FIQ_DEBUGGER_CONSOLE -static void begin_syslog_dump(struct fiq_debugger_state *state) +static void fiq_debugger_begin_syslog_dump(struct fiq_debugger_state *state) { state->syslog_dumping = true; } -static void end_syslog_dump(struct fiq_debugger_state *state) +static void fiq_debugger_end_syslog_dump(struct fiq_debugger_state *state) { state->syslog_dumping = false; } #else extern int do_syslog(int type, char __user *bug, int count); -static void begin_syslog_dump(struct fiq_debugger_state *state) +static void fiq_debugger_begin_syslog_dump(struct fiq_debugger_state *state) { do_syslog(5 /* clear */, NULL, 0); } -static void end_syslog_dump(struct fiq_debugger_state *state) +static void fiq_debugger_end_syslog_dump(struct fiq_debugger_state *state) { - dump_kernel_log(state); + fiq_debugger_dump_kernel_log(state); } #endif -static void do_sysrq(struct fiq_debugger_state *state, char rq) +static void fiq_debugger_do_sysrq(struct fiq_debugger_state *state, char rq) { if ((rq == 'g' || rq == 'G') && !fiq_kgdb_enable) { - debug_printf(state, "sysrq-g blocked\n"); + fiq_debugger_printf(state, "sysrq-g blocked\n"); return; } - begin_syslog_dump(state); + fiq_debugger_begin_syslog_dump(state); handle_sysrq(rq); - end_syslog_dump(state); + fiq_debugger_end_syslog_dump(state); } #ifdef CONFIG_KGDB -static void do_kgdb(struct fiq_debugger_state *state) +static void fiq_debugger_do_kgdb(struct fiq_debugger_state *state) { if (!fiq_kgdb_enable) { - debug_printf(state, "kgdb through fiq debugger not enabled\n"); + fiq_debugger_printf(state, "kgdb through fiq debugger not enabled\n"); return; } - debug_printf(state, "enabling console and triggering kgdb\n"); + fiq_debugger_printf(state, "enabling console and triggering kgdb\n"); state->console_enable = true; handle_sysrq('g'); } #endif -static void debug_schedule_work(struct fiq_debugger_state *state, char *cmd) +static void fiq_debugger_schedule_work(struct fiq_debugger_state *state, + char *cmd) { unsigned long flags; spin_lock_irqsave(&state->work_lock, flags); if (state->work_cmd[0] != '\0') { - debug_printf(state, "work command processor busy\n"); + fiq_debugger_printf(state, "work command processor busy\n"); spin_unlock_irqrestore(&state->work_lock, flags); return; } @@ -559,7 +579,7 @@ static void debug_schedule_work(struct fiq_debugger_state *state, char *cmd) schedule_work(&state->work); } -static void debug_work(struct work_struct *work) +static void fiq_debugger_work(struct work_struct *work) { struct fiq_debugger_state *state; char work_cmd[DEBUG_MAX]; @@ -585,30 +605,32 @@ static void debug_work(struct work_struct *work) else kernel_restart(NULL); } else { - debug_printf(state, "unknown work command '%s'\n", work_cmd); + fiq_debugger_printf(state, "unknown work command '%s'\n", + work_cmd); } } /* This function CANNOT be called in FIQ context */ -static void debug_irq_exec(struct fiq_debugger_state *state, char *cmd) +static void fiq_debugger_irq_exec(struct fiq_debugger_state *state, char *cmd) { if (!strcmp(cmd, "ps")) - do_ps(state); + fiq_debugger_do_ps(state); if (!strcmp(cmd, "sysrq")) - do_sysrq(state, 'h'); + fiq_debugger_do_sysrq(state, 'h'); if (!strncmp(cmd, "sysrq ", 6)) - do_sysrq(state, cmd[6]); + fiq_debugger_do_sysrq(state, cmd[6]); #ifdef CONFIG_KGDB if (!strcmp(cmd, "kgdb")) - do_kgdb(state); + fiq_debugger_do_kgdb(state); #endif if (!strncmp(cmd, "reboot", 6)) - debug_schedule_work(state, cmd); + fiq_debugger_schedule_work(state, cmd); } -static void debug_help(struct fiq_debugger_state *state) +static void fiq_debugger_help(struct fiq_debugger_state *state) { - debug_printf(state, "FIQ Debugger commands:\n" + fiq_debugger_printf(state, + "FIQ Debugger commands:\n" " pc PC status\n" " regs Register dump\n" " allregs Extended Register dump\n" @@ -618,20 +640,23 @@ static void debug_help(struct fiq_debugger_state *state) " irqs Interupt status\n" " kmsg Kernel log\n" " version Kernel version\n"); - debug_printf(state, " sleep Allow sleep while in FIQ\n" + fiq_debugger_printf(state, + " sleep Allow sleep while in FIQ\n" " nosleep Disable sleep while in FIQ\n" " console Switch terminal to console\n" " cpu Current CPU\n" " cpu Switch to CPU\n"); - debug_printf(state, " ps Process list\n" + fiq_debugger_printf(state, + " ps Process list\n" " sysrq sysrq options\n" " sysrq Execute sysrq with \n"); #ifdef CONFIG_KGDB - debug_printf(state, " kgdb Enter kernel debugger\n"); + fiq_debugger_printf(state, + " kgdb Enter kernel debugger\n"); #endif } -static void take_affinity(void *info) +static void fiq_debugger_take_affinity(void *info) { struct fiq_debugger_state *state = info; struct cpumask cpumask; @@ -642,29 +667,31 @@ static void take_affinity(void *info) irq_set_affinity(state->uart_irq, &cpumask); } -static void switch_cpu(struct fiq_debugger_state *state, int cpu) +static void fiq_debugger_switch_cpu(struct fiq_debugger_state *state, int cpu) { - if (!debug_have_fiq(state)) - smp_call_function_single(cpu, take_affinity, state, false); + if (!fiq_debugger_have_fiq(state)) + smp_call_function_single(cpu, fiq_debugger_take_affinity, state, + false); state->current_cpu = cpu; } -static bool debug_fiq_exec(struct fiq_debugger_state *state, +static bool fiq_debugger_fiq_exec(struct fiq_debugger_state *state, const char *cmd, unsigned *regs, void *svc_sp) { bool signal_helper = false; if (!strcmp(cmd, "help") || !strcmp(cmd, "?")) { - debug_help(state); + fiq_debugger_help(state); } else if (!strcmp(cmd, "pc")) { - debug_printf(state, " pc %08x cpsr %08x mode %s\n", + fiq_debugger_printf(state, " pc %08x cpsr %08x mode %s\n", regs[15], regs[16], mode_name(regs[16])); } else if (!strcmp(cmd, "regs")) { - dump_regs(state, regs); + fiq_debugger_dump_regs(state, regs); } else if (!strcmp(cmd, "allregs")) { - dump_allregs(state, regs); + fiq_debugger_dump_allregs(state, regs); } else if (!strcmp(cmd, "bt")) { - dump_stacktrace(state, (struct pt_regs *)regs, 100, svc_sp); + fiq_debugger_dump_stacktrace(state, (struct pt_regs *)regs, 100, + svc_sp); } else if (!strncmp(cmd, "reset", 5)) { cmd += 5; while (*cmd == ' ') @@ -677,33 +704,33 @@ static bool debug_fiq_exec(struct fiq_debugger_state *state, machine_restart(NULL); } } else if (!strcmp(cmd, "irqs")) { - dump_irqs(state); + fiq_debugger_dump_irqs(state); } else if (!strcmp(cmd, "kmsg")) { - dump_kernel_log(state); + fiq_debugger_dump_kernel_log(state); } else if (!strcmp(cmd, "version")) { - debug_printf(state, "%s\n", linux_banner); + fiq_debugger_printf(state, "%s\n", linux_banner); } else if (!strcmp(cmd, "sleep")) { state->no_sleep = false; - debug_printf(state, "enabling sleep\n"); + fiq_debugger_printf(state, "enabling sleep\n"); } else if (!strcmp(cmd, "nosleep")) { state->no_sleep = true; - debug_printf(state, "disabling sleep\n"); + fiq_debugger_printf(state, "disabling sleep\n"); } else if (!strcmp(cmd, "console")) { - debug_printf(state, "console mode\n"); - debug_uart_flush(state); + fiq_debugger_printf(state, "console mode\n"); + fiq_debugger_uart_flush(state); state->console_enable = true; } else if (!strcmp(cmd, "cpu")) { - debug_printf(state, "cpu %d\n", state->current_cpu); + fiq_debugger_printf(state, "cpu %d\n", state->current_cpu); } else if (!strncmp(cmd, "cpu ", 4)) { unsigned long cpu = 0; if (strict_strtoul(cmd + 4, 10, &cpu) == 0) - switch_cpu(state, cpu); + fiq_debugger_switch_cpu(state, cpu); else - debug_printf(state, "invalid cpu\n"); - debug_printf(state, "cpu %d\n", state->current_cpu); + fiq_debugger_printf(state, "invalid cpu\n"); + fiq_debugger_printf(state, "cpu %d\n", state->current_cpu); } else { if (state->debug_busy) { - debug_printf(state, + fiq_debugger_printf(state, "command processor busy. trying to abort.\n"); state->debug_abort = -1; } else { @@ -714,12 +741,12 @@ static bool debug_fiq_exec(struct fiq_debugger_state *state, return true; } if (!state->console_enable) - debug_prompt(state); + fiq_debugger_prompt(state); return signal_helper; } -static void sleep_timer_expired(unsigned long data) +static void fiq_debugger_sleep_timer_expired(unsigned long data) { struct fiq_debugger_state *state = (struct fiq_debugger_state *)data; unsigned long flags; @@ -728,18 +755,19 @@ static void sleep_timer_expired(unsigned long data) if (state->uart_enabled && !state->no_sleep) { if (state->debug_enable && !state->console_enable) { state->debug_enable = false; - debug_printf_nfiq(state, "suspending fiq debugger\n"); + fiq_debugger_printf_nfiq(state, + "suspending fiq debugger\n"); } state->ignore_next_wakeup_irq = true; - debug_uart_disable(state); + fiq_debugger_uart_disable(state); state->uart_enabled = false; - enable_wakeup_irq(state); + fiq_debugger_enable_wakeup_irq(state); } wake_unlock(&state->debugger_wake_lock); spin_unlock_irqrestore(&state->sleep_timer_lock, flags); } -static void handle_wakeup(struct fiq_debugger_state *state) +static void fiq_debugger_handle_wakeup(struct fiq_debugger_state *state) { unsigned long flags; @@ -748,26 +776,27 @@ static void handle_wakeup(struct fiq_debugger_state *state) state->ignore_next_wakeup_irq = false; } else if (!state->uart_enabled) { wake_lock(&state->debugger_wake_lock); - debug_uart_enable(state); + fiq_debugger_uart_enable(state); state->uart_enabled = true; - disable_wakeup_irq(state); + fiq_debugger_disable_wakeup_irq(state); mod_timer(&state->sleep_timer, jiffies + HZ / 2); } spin_unlock_irqrestore(&state->sleep_timer_lock, flags); } -static irqreturn_t wakeup_irq_handler(int irq, void *dev) +static irqreturn_t fiq_debugger_wakeup_irq_handler(int irq, void *dev) { struct fiq_debugger_state *state = dev; if (!state->no_sleep) - debug_puts(state, "WAKEUP\n"); - handle_wakeup(state); + fiq_debugger_puts(state, "WAKEUP\n"); + fiq_debugger_handle_wakeup(state); return IRQ_HANDLED; } -static void debug_handle_console_irq_context(struct fiq_debugger_state *state) +static +void fiq_debugger_handle_console_irq_context(struct fiq_debugger_state *state) { #if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) if (state->tty_port.ops) { @@ -784,7 +813,7 @@ static void debug_handle_console_irq_context(struct fiq_debugger_state *state) #endif } -static void debug_handle_irq_context(struct fiq_debugger_state *state) +static void fiq_debugger_handle_irq_context(struct fiq_debugger_state *state) { if (!state->no_sleep) { unsigned long flags; @@ -794,22 +823,22 @@ static void debug_handle_irq_context(struct fiq_debugger_state *state) mod_timer(&state->sleep_timer, jiffies + HZ * 5); spin_unlock_irqrestore(&state->sleep_timer_lock, flags); } - debug_handle_console_irq_context(state); + fiq_debugger_handle_console_irq_context(state); if (state->debug_busy) { - debug_irq_exec(state, state->debug_cmd); + fiq_debugger_irq_exec(state, state->debug_cmd); if (!state->console_enable) - debug_prompt(state); + fiq_debugger_prompt(state); state->debug_busy = 0; } } -static int debug_getc(struct fiq_debugger_state *state) +static int fiq_debugger_getc(struct fiq_debugger_state *state) { return state->pdata->uart_getc(state->pdev); } -static bool debug_handle_uart_interrupt(struct fiq_debugger_state *state, - int this_cpu, void *regs, void *svc_sp) +static bool fiq_debugger_handle_uart_interrupt(struct fiq_debugger_state *state, + int this_cpu, struct pt_regs *regs, void *svc_sp) { int c; static int last_c; @@ -824,30 +853,31 @@ static bool debug_handle_uart_interrupt(struct fiq_debugger_state *state, MAX_UNHANDLED_FIQ_COUNT) return false; - debug_printf(state, "fiq_debugger: cpu %d not responding, " + fiq_debugger_printf(state, + "fiq_debugger: cpu %d not responding, " "reverting to cpu %d\n", state->current_cpu, this_cpu); atomic_set(&state->unhandled_fiq_count, 0); - switch_cpu(state, this_cpu); + fiq_debugger_switch_cpu(state, this_cpu); return false; } state->in_fiq = true; - while ((c = debug_getc(state)) != FIQ_DEBUGGER_NO_CHAR) { + while ((c = fiq_debugger_getc(state)) != FIQ_DEBUGGER_NO_CHAR) { count++; if (!state->debug_enable) { if ((c == 13) || (c == 10)) { state->debug_enable = true; state->debug_count = 0; - debug_prompt(state); + fiq_debugger_prompt(state); } } else if (c == FIQ_DEBUGGER_BREAK) { state->console_enable = false; - debug_puts(state, "fiq debugger mode\n"); + fiq_debugger_puts(state, "fiq debugger mode\n"); state->debug_count = 0; - debug_prompt(state); + fiq_debugger_prompt(state); #ifdef CONFIG_FIQ_DEBUGGER_CONSOLE } else if (state->console_enable && state->tty_rbuf) { fiq_debugger_ringbuf_push(state->tty_rbuf, c); @@ -856,34 +886,35 @@ static bool debug_handle_uart_interrupt(struct fiq_debugger_state *state, } else if ((c >= ' ') && (c < 127)) { if (state->debug_count < (DEBUG_MAX - 1)) { state->debug_buf[state->debug_count++] = c; - debug_putc(state, c); + fiq_debugger_putc(state, c); } } else if ((c == 8) || (c == 127)) { if (state->debug_count > 0) { state->debug_count--; - debug_putc(state, 8); - debug_putc(state, ' '); - debug_putc(state, 8); + fiq_debugger_putc(state, 8); + fiq_debugger_putc(state, ' '); + fiq_debugger_putc(state, 8); } } else if ((c == 13) || (c == 10)) { if (c == '\r' || (c == '\n' && last_c != '\r')) { - debug_putc(state, '\r'); - debug_putc(state, '\n'); + fiq_debugger_putc(state, '\r'); + fiq_debugger_putc(state, '\n'); } if (state->debug_count) { state->debug_buf[state->debug_count] = 0; state->debug_count = 0; signal_helper |= - debug_fiq_exec(state, state->debug_buf, - regs, svc_sp); + fiq_debugger_fiq_exec(state, + state->debug_buf, + regs, svc_sp); } else { - debug_prompt(state); + fiq_debugger_prompt(state); } } last_c = c; } if (!state->console_enable) - debug_uart_flush(state); + fiq_debugger_uart_flush(state); if (state->pdata->fiq_ack) state->pdata->fiq_ack(state->pdev, state->fiq); @@ -897,16 +928,18 @@ static bool debug_handle_uart_interrupt(struct fiq_debugger_state *state, return signal_helper; } -static void debug_fiq(struct fiq_glue_handler *h, void *regs, void *svc_sp) +static void fiq_debugger_fiq(struct fiq_glue_handler *h, void *regs, + void *svc_sp) { struct fiq_debugger_state *state = container_of(h, struct fiq_debugger_state, handler); unsigned int this_cpu = THREAD_INFO(svc_sp)->cpu; bool need_irq; - need_irq = debug_handle_uart_interrupt(state, this_cpu, regs, svc_sp); + need_irq = fiq_debugger_handle_uart_interrupt(state, this_cpu, regs, + svc_sp); if (need_irq) - debug_force_irq(state); + fiq_debugger_force_irq(state); } /* @@ -914,19 +947,19 @@ static void debug_fiq(struct fiq_glue_handler *h, void *regs, void *svc_sp) * This just effectively takes over the UART interrupt and does all the work * in this context. */ -static irqreturn_t debug_uart_irq(int irq, void *dev) +static irqreturn_t fiq_debugger_uart_irq(int irq, void *dev) { struct fiq_debugger_state *state = dev; bool not_done; - handle_wakeup(state); + fiq_debugger_handle_wakeup(state); /* handle the debugger irq in regular context */ - not_done = debug_handle_uart_interrupt(state, smp_processor_id(), + not_done = fiq_debugger_handle_uart_interrupt(state, smp_processor_id(), get_irq_regs(), current_thread_info()); if (not_done) - debug_handle_irq_context(state); + fiq_debugger_handle_irq_context(state); return IRQ_HANDLED; } @@ -936,19 +969,19 @@ static irqreturn_t debug_uart_irq(int irq, void *dev) * FIQ handler does what it can and then signals this interrupt to finish the * job in irq context. */ -static irqreturn_t debug_signal_irq(int irq, void *dev) +static irqreturn_t fiq_debugger_signal_irq(int irq, void *dev) { struct fiq_debugger_state *state = dev; if (state->pdata->force_irq_ack) state->pdata->force_irq_ack(state->pdev, state->signal_irq); - debug_handle_irq_context(state); + fiq_debugger_handle_irq_context(state); return IRQ_HANDLED; } -static void debug_resume(struct fiq_glue_handler *h) +static void fiq_debugger_resume(struct fiq_glue_handler *h) { struct fiq_debugger_state *state = container_of(h, struct fiq_debugger_state, handler); @@ -957,13 +990,13 @@ static void debug_resume(struct fiq_glue_handler *h) } #if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) -struct tty_driver *debug_console_device(struct console *co, int *index) +struct tty_driver *fiq_debugger_console_device(struct console *co, int *index) { *index = co->index; return fiq_tty_driver; } -static void debug_console_write(struct console *co, +static void fiq_debugger_console_write(struct console *co, const char *s, unsigned int count) { struct fiq_debugger_state *state; @@ -974,22 +1007,22 @@ static void debug_console_write(struct console *co, if (!state->console_enable && !state->syslog_dumping) return; - debug_uart_enable(state); + fiq_debugger_uart_enable(state); spin_lock_irqsave(&state->console_lock, flags); while (count--) { if (*s == '\n') - debug_putc(state, '\r'); - debug_putc(state, *s++); + fiq_debugger_putc(state, '\r'); + fiq_debugger_putc(state, *s++); } - debug_uart_flush(state); + fiq_debugger_uart_flush(state); spin_unlock_irqrestore(&state->console_lock, flags); - debug_uart_disable(state); + fiq_debugger_uart_disable(state); } static struct console fiq_debugger_console = { .name = "ttyFIQ", - .device = debug_console_device, - .write = debug_console_write, + .device = fiq_debugger_console_device, + .write = fiq_debugger_console_write, .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_ENABLED, }; @@ -1017,12 +1050,12 @@ int fiq_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) if (!state->console_enable) return count; - debug_uart_enable(state); + fiq_debugger_uart_enable(state); spin_lock_irq(&state->console_lock); for (i = 0; i < count; i++) - debug_putc(state, *buf++); + fiq_debugger_putc(state, *buf++); spin_unlock_irq(&state->console_lock); - debug_uart_disable(state); + fiq_debugger_uart_disable(state); return count; } @@ -1044,19 +1077,19 @@ static int fiq_tty_poll_get_char(struct tty_driver *driver, int line) struct fiq_debugger_state *state = states[line]; int c = NO_POLL_CHAR; - debug_uart_enable(state); - if (debug_have_fiq(state)) { + fiq_debugger_uart_enable(state); + if (fiq_debugger_have_fiq(state)) { int count = fiq_debugger_ringbuf_level(state->tty_rbuf); if (count > 0) { c = fiq_debugger_ringbuf_peek(state->tty_rbuf, 0); fiq_debugger_ringbuf_consume(state->tty_rbuf, 1); } } else { - c = debug_getc(state); + c = fiq_debugger_getc(state); if (c == FIQ_DEBUGGER_NO_CHAR) c = NO_POLL_CHAR; } - debug_uart_disable(state); + fiq_debugger_uart_disable(state); return c; } @@ -1065,9 +1098,9 @@ static void fiq_tty_poll_put_char(struct tty_driver *driver, int line, char ch) { struct fiq_debugger_state **states = driver->driver_state; struct fiq_debugger_state *state = states[line]; - debug_uart_enable(state); - debug_putc(state, ch); - debug_uart_disable(state); + fiq_debugger_uart_enable(state); + fiq_debugger_putc(state, ch); + fiq_debugger_uart_disable(state); } #endif @@ -1224,7 +1257,7 @@ static int fiq_debugger_probe(struct platform_device *pdev) return -EINVAL; state = kzalloc(sizeof(*state), GFP_KERNEL); - setup_timer(&state->sleep_timer, sleep_timer_expired, + setup_timer(&state->sleep_timer, fiq_debugger_sleep_timer_expired, (unsigned long)state); state->pdata = pdata; state->pdev = pdev; @@ -1237,14 +1270,14 @@ static int fiq_debugger_probe(struct platform_device *pdev) state->signal_irq = platform_get_irq_byname(pdev, "signal"); state->wakeup_irq = platform_get_irq_byname(pdev, "wakeup"); - INIT_WORK(&state->work, debug_work); + INIT_WORK(&state->work, fiq_debugger_work); spin_lock_init(&state->work_lock); platform_set_drvdata(pdev, state); spin_lock_init(&state->sleep_timer_lock); - if (state->wakeup_irq < 0 && debug_have_fiq(state)) + if (state->wakeup_irq < 0 && fiq_debugger_have_fiq(state)) state->no_sleep = true; state->ignore_next_wakeup_irq = !state->no_sleep; @@ -1268,12 +1301,13 @@ static int fiq_debugger_probe(struct platform_device *pdev) goto err_uart_init; } - debug_printf_nfiq(state, "\n", + fiq_debugger_printf_nfiq(state, + "\n", state->no_sleep ? "" : "twice "); - if (debug_have_fiq(state)) { - state->handler.fiq = debug_fiq; - state->handler.resume = debug_resume; + if (fiq_debugger_have_fiq(state)) { + state->handler.fiq = fiq_debugger_fiq; + state->handler.resume = fiq_debugger_resume; ret = fiq_glue_register_handler(&state->handler); if (ret) { pr_err("%s: could not install fiq handler\n", __func__); @@ -1282,7 +1316,7 @@ static int fiq_debugger_probe(struct platform_device *pdev) pdata->fiq_enable(pdev, state->fiq, 1); } else { - ret = request_irq(state->uart_irq, debug_uart_irq, + ret = request_irq(state->uart_irq, fiq_debugger_uart_irq, IRQF_NO_SUSPEND, "debug", state); if (ret) { pr_err("%s: could not install irq handler\n", __func__); @@ -1299,14 +1333,15 @@ static int fiq_debugger_probe(struct platform_device *pdev) clk_disable(state->clk); if (state->signal_irq >= 0) { - ret = request_irq(state->signal_irq, debug_signal_irq, + ret = request_irq(state->signal_irq, fiq_debugger_signal_irq, IRQF_TRIGGER_RISING, "debug-signal", state); if (ret) pr_err("serial_debugger: could not install signal_irq"); } if (state->wakeup_irq >= 0) { - ret = request_irq(state->wakeup_irq, wakeup_irq_handler, + ret = request_irq(state->wakeup_irq, + fiq_debugger_wakeup_irq_handler, IRQF_TRIGGER_FALLING | IRQF_DISABLED, "debug-wakeup", state); if (ret) { @@ -1323,7 +1358,7 @@ static int fiq_debugger_probe(struct platform_device *pdev) } } if (state->no_sleep) - handle_wakeup(state); + fiq_debugger_handle_wakeup(state); #if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) spin_lock_init(&state->console_lock); -- GitLab From cbee9153275df6b20dd20e94fa4c1d72e448d127 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 2 Apr 2014 18:37:29 -0700 Subject: [PATCH 0883/1442] ANDROID: fiq_debugger: allow compiling without CONFIG_FIQ_GLUE Allow compiling fiq_debugger.c without CONFIG_FIQ_GLUE for platforms that don't support FIQs. Change-Id: Iabdfd790d24fa9d47b29d2f850c567af2dcad78f Signed-off-by: Colin Cross --- .../android/fiq_debugger/fiq_debugger.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c index c3a862790239..4c392729fc68 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -35,7 +35,9 @@ #include #include +#ifdef CONFIG_FIQ_GLUE #include +#endif #include #include @@ -52,7 +54,9 @@ ((unsigned long)(sp) & ~(THREAD_SIZE - 1))) struct fiq_debugger_state { +#ifdef CONFIG_FIQ_GLUE struct fiq_glue_handler handler; +#endif int fiq; int uart_irq; @@ -154,6 +158,7 @@ static inline bool fiq_debugger_have_fiq(struct fiq_debugger_state *state) return (state->fiq >= 0); } +#ifdef CONFIG_FIQ_GLUE static void fiq_debugger_force_irq(struct fiq_debugger_state *state) { unsigned int irq = state->signal_irq; @@ -168,6 +173,7 @@ static void fiq_debugger_force_irq(struct fiq_debugger_state *state) chip->irq_retrigger(irq_get_irq_data(irq)); } } +#endif static void fiq_debugger_uart_enable(struct fiq_debugger_state *state) { @@ -928,6 +934,7 @@ static bool fiq_debugger_handle_uart_interrupt(struct fiq_debugger_state *state, return signal_helper; } +#ifdef CONFIG_FIQ_GLUE static void fiq_debugger_fiq(struct fiq_glue_handler *h, void *regs, void *svc_sp) { @@ -941,6 +948,7 @@ static void fiq_debugger_fiq(struct fiq_glue_handler *h, void *regs, if (need_irq) fiq_debugger_force_irq(state); } +#endif /* * When not using FIQs, we only use this single interrupt as an entry point. @@ -981,6 +989,7 @@ static irqreturn_t fiq_debugger_signal_irq(int irq, void *dev) return IRQ_HANDLED; } +#ifdef CONFIG_FIQ_GLUE static void fiq_debugger_resume(struct fiq_glue_handler *h) { struct fiq_debugger_state *state = @@ -988,6 +997,7 @@ static void fiq_debugger_resume(struct fiq_glue_handler *h) if (state->pdata->uart_resume) state->pdata->uart_resume(state->pdev); } +#endif #if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) struct tty_driver *fiq_debugger_console_device(struct console *co, int *index) @@ -1305,17 +1315,20 @@ static int fiq_debugger_probe(struct platform_device *pdev) "\n", state->no_sleep ? "" : "twice "); +#ifdef CONFIG_FIQ_GLUE if (fiq_debugger_have_fiq(state)) { state->handler.fiq = fiq_debugger_fiq; state->handler.resume = fiq_debugger_resume; ret = fiq_glue_register_handler(&state->handler); if (ret) { pr_err("%s: could not install fiq handler\n", __func__); - goto err_register_fiq; + goto err_register_irq; } pdata->fiq_enable(pdev, state->fiq, 1); - } else { + } else +#endif + { ret = request_irq(state->uart_irq, fiq_debugger_uart_irq, IRQF_NO_SUSPEND, "debug", state); if (ret) { @@ -1373,7 +1386,6 @@ static int fiq_debugger_probe(struct platform_device *pdev) return 0; err_register_irq: -err_register_fiq: if (pdata->uart_free) pdata->uart_free(pdev); err_uart_init: -- GitLab From 7efcbb80efa474e6ebd5182460f09322f88cfa8c Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Fri, 4 Apr 2014 17:05:19 -0700 Subject: [PATCH 0884/1442] ANDROID: fiq_debugger: use pt_regs for registers IRQ mode already passes in a struct pt_regs from get_irq_regs(). FIQ mode passes in something similar but not identical to a struct pt_regs - FIQ mode stores the spsr of the interrupted mode in slot 17, while pt_regs expects orig_r0. Replace the existing mixture of void *regs, unsigned *regs, and struct pt_regs * const with const struct pt_regs *. Modify dump_regs not to print the spsr since it won't be there in a struct pt_regs anyways. Modify dump_allregs to highlight the mode that was interrupted, making spsr easy to find there. Change-Id: Ibfe1723d702306c7605fd071737d7be9ee9d8c12 Signed-off-by: Colin Cross --- .../android/fiq_debugger/fiq_debugger.c | 63 ++++++++++--------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c index 4c392729fc68..d660a46e7893 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -280,28 +280,22 @@ static int fiq_debugger_printf_nfiq(void *cookie, const char *fmt, ...) } static void fiq_debugger_dump_regs(struct fiq_debugger_state *state, - unsigned *regs) + const struct pt_regs *regs) { fiq_debugger_printf(state, " r0 %08x r1 %08x r2 %08x r3 %08x\n", - regs[0], regs[1], regs[2], regs[3]); + regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3); fiq_debugger_printf(state, " r4 %08x r5 %08x r6 %08x r7 %08x\n", - regs[4], regs[5], regs[6], regs[7]); + regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7); fiq_debugger_printf(state, " r8 %08x r9 %08x r10 %08x r11 %08x mode %s\n", - regs[8], regs[9], regs[10], regs[11], - mode_name(regs[16])); - if ((regs[16] & MODE_MASK) == USR_MODE) - fiq_debugger_printf(state, - " ip %08x sp %08x lr %08x pc %08x cpsr %08x\n", - regs[12], regs[13], regs[14], regs[15], - regs[16]); - else - fiq_debugger_printf(state, - " ip %08x sp %08x lr %08x pc %08x cpsr %08x spsr %08x\n", - regs[12], regs[13], regs[14], regs[15], - regs[16], regs[17]); + regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp, + mode_name(regs->ARM_cpsr)); + fiq_debugger_printf(state, + " ip %08x sp %08x lr %08x pc %08x cpsr %08x\n", + regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc, + regs->ARM_cpsr); } struct mode_regs { @@ -357,25 +351,33 @@ void __naked get_mode_regs(struct mode_regs *regs) static void fiq_debugger_dump_allregs(struct fiq_debugger_state *state, - unsigned *regs) + const struct pt_regs *regs) { struct mode_regs mode_regs; + unsigned long mode = regs->ARM_cpsr & MODE_MASK; + fiq_debugger_dump_regs(state, regs); get_mode_regs(&mode_regs); + fiq_debugger_printf(state, - " svc: sp %08x lr %08x spsr %08x\n", + "%csvc: sp %08x lr %08x spsr %08x\n", + mode == SVC_MODE ? '*' : ' ', mode_regs.sp_svc, mode_regs.lr_svc, mode_regs.spsr_svc); fiq_debugger_printf(state, - " abt: sp %08x lr %08x spsr %08x\n", + "%cabt: sp %08x lr %08x spsr %08x\n", + mode == ABT_MODE ? '*' : ' ', mode_regs.sp_abt, mode_regs.lr_abt, mode_regs.spsr_abt); fiq_debugger_printf(state, - " und: sp %08x lr %08x spsr %08x\n", + "%cund: sp %08x lr %08x spsr %08x\n", + mode == UND_MODE ? '*' : ' ', mode_regs.sp_und, mode_regs.lr_und, mode_regs.spsr_und); fiq_debugger_printf(state, - " irq: sp %08x lr %08x spsr %08x\n", + "%cirq: sp %08x lr %08x spsr %08x\n", + mode == IRQ_MODE ? '*' : ' ', mode_regs.sp_irq, mode_regs.lr_irq, mode_regs.spsr_irq); fiq_debugger_printf(state, - " fiq: r8 %08x r9 %08x r10 %08x r11 %08x r12 %08x\n", + "%cfiq: r8 %08x r9 %08x r10 %08x r11 %08x r12 %08x\n", + mode == FIQ_MODE ? '*' : ' ', mode_regs.r8_fiq, mode_regs.r9_fiq, mode_regs.r10_fiq, mode_regs.r11_fiq, mode_regs.r12_fiq); fiq_debugger_printf(state, @@ -459,7 +461,7 @@ static struct frame_tail *user_backtrace(struct fiq_debugger_state *state, } void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, - struct pt_regs * const regs, unsigned int depth, void *ssp) + const struct pt_regs *regs, unsigned int depth, void *ssp) { struct frame_tail *tail; struct thread_info *real_thread_info = THREAD_INFO(ssp); @@ -474,7 +476,7 @@ void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, else fiq_debugger_printf(state, "pid: %d comm: %s\n", current->pid, current->comm); - fiq_debugger_dump_regs(state, (unsigned *)regs); + fiq_debugger_dump_regs(state, regs); if (!user_mode(regs)) { struct stackframe frame; @@ -682,7 +684,8 @@ static void fiq_debugger_switch_cpu(struct fiq_debugger_state *state, int cpu) } static bool fiq_debugger_fiq_exec(struct fiq_debugger_state *state, - const char *cmd, unsigned *regs, void *svc_sp) + const char *cmd, const struct pt_regs *regs, + void *svc_sp) { bool signal_helper = false; @@ -690,14 +693,14 @@ static bool fiq_debugger_fiq_exec(struct fiq_debugger_state *state, fiq_debugger_help(state); } else if (!strcmp(cmd, "pc")) { fiq_debugger_printf(state, " pc %08x cpsr %08x mode %s\n", - regs[15], regs[16], mode_name(regs[16])); + regs->ARM_pc, regs->ARM_cpsr, + mode_name(regs->ARM_cpsr)); } else if (!strcmp(cmd, "regs")) { fiq_debugger_dump_regs(state, regs); } else if (!strcmp(cmd, "allregs")) { fiq_debugger_dump_allregs(state, regs); } else if (!strcmp(cmd, "bt")) { - fiq_debugger_dump_stacktrace(state, (struct pt_regs *)regs, 100, - svc_sp); + fiq_debugger_dump_stacktrace(state, regs, 100, svc_sp); } else if (!strncmp(cmd, "reset", 5)) { cmd += 5; while (*cmd == ' ') @@ -844,7 +847,7 @@ static int fiq_debugger_getc(struct fiq_debugger_state *state) } static bool fiq_debugger_handle_uart_interrupt(struct fiq_debugger_state *state, - int this_cpu, struct pt_regs *regs, void *svc_sp) + int this_cpu, const struct pt_regs *regs, void *svc_sp) { int c; static int last_c; @@ -935,8 +938,8 @@ static bool fiq_debugger_handle_uart_interrupt(struct fiq_debugger_state *state, } #ifdef CONFIG_FIQ_GLUE -static void fiq_debugger_fiq(struct fiq_glue_handler *h, void *regs, - void *svc_sp) +static void fiq_debugger_fiq(struct fiq_glue_handler *h, + const struct pt_regs *regs, void *svc_sp) { struct fiq_debugger_state *state = container_of(h, struct fiq_debugger_state, handler); -- GitLab From cffbe78e015d823b22cbc1f667a3dd377af55e12 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 2 Apr 2014 18:42:13 -0700 Subject: [PATCH 0885/1442] ANDROID: fiq_debugger: split arm support into fiq_debugger_arm.c Split arm support into a separate .c file that is only built for CONFIG_ARM. Change-Id: Iba16f4d51608bf9c3e5c8acefefcd38fead9797c Signed-off-by: Colin Cross --- drivers/staging/android/fiq_debugger/Makefile | 1 + .../android/fiq_debugger/fiq_debugger.c | 223 +--------------- .../android/fiq_debugger/fiq_debugger_arm.c | 240 ++++++++++++++++++ .../android/fiq_debugger/fiq_debugger_priv.h | 36 +++ 4 files changed, 280 insertions(+), 220 deletions(-) create mode 100644 drivers/staging/android/fiq_debugger/fiq_debugger_arm.c create mode 100644 drivers/staging/android/fiq_debugger/fiq_debugger_priv.h diff --git a/drivers/staging/android/fiq_debugger/Makefile b/drivers/staging/android/fiq_debugger/Makefile index 437037852963..1e203aed8c62 100644 --- a/drivers/staging/android/fiq_debugger/Makefile +++ b/drivers/staging/android/fiq_debugger/Makefile @@ -1 +1,2 @@ obj-y += fiq_debugger.o +obj-$(CONFIG_ARM) += fiq_debugger_arm.o diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c index d660a46e7893..5516d3113b94 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -38,11 +38,11 @@ #ifdef CONFIG_FIQ_GLUE #include #endif -#include #include #include "fiq_debugger.h" +#include "fiq_debugger_priv.h" #include "fiq_debugger_ringbuf.h" #define DEBUG_MAX 64 @@ -50,9 +50,6 @@ #define MAX_FIQ_DEBUGGER_PORTS 4 -#define THREAD_INFO(sp) ((struct thread_info *) \ - ((unsigned long)(sp) & ~(THREAD_SIZE - 1))) - struct fiq_debugger_state { #ifdef CONFIG_FIQ_GLUE struct fiq_glue_handler handler; @@ -232,21 +229,7 @@ static void fiq_debugger_dump_kernel_log(struct fiq_debugger_state *state) } } -static char *mode_name(unsigned cpsr) -{ - switch (cpsr & MODE_MASK) { - case USR_MODE: return "USR"; - case FIQ_MODE: return "FIQ"; - case IRQ_MODE: return "IRQ"; - case SVC_MODE: return "SVC"; - case ABT_MODE: return "ABT"; - case UND_MODE: return "UND"; - case SYSTEM_MODE: return "SYS"; - default: return "???"; - } -} - -static int fiq_debugger_printf(void *cookie, const char *fmt, ...) +int fiq_debugger_printf(void *cookie, const char *fmt, ...) { struct fiq_debugger_state *state = cookie; char buf[256]; @@ -279,112 +262,6 @@ static int fiq_debugger_printf_nfiq(void *cookie, const char *fmt, ...) return state->debug_abort; } -static void fiq_debugger_dump_regs(struct fiq_debugger_state *state, - const struct pt_regs *regs) -{ - fiq_debugger_printf(state, - " r0 %08x r1 %08x r2 %08x r3 %08x\n", - regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3); - fiq_debugger_printf(state, - " r4 %08x r5 %08x r6 %08x r7 %08x\n", - regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7); - fiq_debugger_printf(state, - " r8 %08x r9 %08x r10 %08x r11 %08x mode %s\n", - regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp, - mode_name(regs->ARM_cpsr)); - fiq_debugger_printf(state, - " ip %08x sp %08x lr %08x pc %08x cpsr %08x\n", - regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc, - regs->ARM_cpsr); -} - -struct mode_regs { - unsigned long sp_svc; - unsigned long lr_svc; - unsigned long spsr_svc; - - unsigned long sp_abt; - unsigned long lr_abt; - unsigned long spsr_abt; - - unsigned long sp_und; - unsigned long lr_und; - unsigned long spsr_und; - - unsigned long sp_irq; - unsigned long lr_irq; - unsigned long spsr_irq; - - unsigned long r8_fiq; - unsigned long r9_fiq; - unsigned long r10_fiq; - unsigned long r11_fiq; - unsigned long r12_fiq; - unsigned long sp_fiq; - unsigned long lr_fiq; - unsigned long spsr_fiq; -}; - -void __naked get_mode_regs(struct mode_regs *regs) -{ - asm volatile ( - "mrs r1, cpsr\n" - "msr cpsr_c, #0xd3 @(SVC_MODE | PSR_I_BIT | PSR_F_BIT)\n" - "stmia r0!, {r13 - r14}\n" - "mrs r2, spsr\n" - "msr cpsr_c, #0xd7 @(ABT_MODE | PSR_I_BIT | PSR_F_BIT)\n" - "stmia r0!, {r2, r13 - r14}\n" - "mrs r2, spsr\n" - "msr cpsr_c, #0xdb @(UND_MODE | PSR_I_BIT | PSR_F_BIT)\n" - "stmia r0!, {r2, r13 - r14}\n" - "mrs r2, spsr\n" - "msr cpsr_c, #0xd2 @(IRQ_MODE | PSR_I_BIT | PSR_F_BIT)\n" - "stmia r0!, {r2, r13 - r14}\n" - "mrs r2, spsr\n" - "msr cpsr_c, #0xd1 @(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)\n" - "stmia r0!, {r2, r8 - r14}\n" - "mrs r2, spsr\n" - "stmia r0!, {r2}\n" - "msr cpsr_c, r1\n" - "bx lr\n"); -} - - -static void fiq_debugger_dump_allregs(struct fiq_debugger_state *state, - const struct pt_regs *regs) -{ - struct mode_regs mode_regs; - unsigned long mode = regs->ARM_cpsr & MODE_MASK; - - fiq_debugger_dump_regs(state, regs); - get_mode_regs(&mode_regs); - - fiq_debugger_printf(state, - "%csvc: sp %08x lr %08x spsr %08x\n", - mode == SVC_MODE ? '*' : ' ', - mode_regs.sp_svc, mode_regs.lr_svc, mode_regs.spsr_svc); - fiq_debugger_printf(state, - "%cabt: sp %08x lr %08x spsr %08x\n", - mode == ABT_MODE ? '*' : ' ', - mode_regs.sp_abt, mode_regs.lr_abt, mode_regs.spsr_abt); - fiq_debugger_printf(state, - "%cund: sp %08x lr %08x spsr %08x\n", - mode == UND_MODE ? '*' : ' ', - mode_regs.sp_und, mode_regs.lr_und, mode_regs.spsr_und); - fiq_debugger_printf(state, - "%cirq: sp %08x lr %08x spsr %08x\n", - mode == IRQ_MODE ? '*' : ' ', - mode_regs.sp_irq, mode_regs.lr_irq, mode_regs.spsr_irq); - fiq_debugger_printf(state, - "%cfiq: r8 %08x r9 %08x r10 %08x r11 %08x r12 %08x\n", - mode == FIQ_MODE ? '*' : ' ', - mode_regs.r8_fiq, mode_regs.r9_fiq, mode_regs.r10_fiq, - mode_regs.r11_fiq, mode_regs.r12_fiq); - fiq_debugger_printf(state, - " fiq: sp %08x lr %08x spsr %08x\n", - mode_regs.sp_fiq, mode_regs.lr_fiq, mode_regs.spsr_fiq); -} - static void fiq_debugger_dump_irqs(struct fiq_debugger_state *state) { int n; @@ -405,98 +282,6 @@ static void fiq_debugger_dump_irqs(struct fiq_debugger_state *state) } } -struct stacktrace_state { - struct fiq_debugger_state *state; - unsigned int depth; -}; - -static int report_trace(struct stackframe *frame, void *d) -{ - struct stacktrace_state *sts = d; - - if (sts->depth) { - fiq_debugger_printf(sts->state, - " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", - frame->pc, frame->pc, frame->lr, frame->lr, - frame->sp, frame->fp); - sts->depth--; - return 0; - } - fiq_debugger_printf(sts->state, " ...\n"); - - return sts->depth == 0; -} - -struct frame_tail { - struct frame_tail *fp; - unsigned long sp; - unsigned long lr; -} __attribute__((packed)); - -static struct frame_tail *user_backtrace(struct fiq_debugger_state *state, - struct frame_tail *tail) -{ - struct frame_tail buftail[2]; - - /* Also check accessibility of one struct frame_tail beyond */ - if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) { - fiq_debugger_printf(state, " invalid frame pointer %p\n", - tail); - return NULL; - } - if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) { - fiq_debugger_printf(state, - " failed to copy frame pointer %p\n", tail); - return NULL; - } - - fiq_debugger_printf(state, " %p\n", buftail[0].lr); - - /* frame pointers should strictly progress back up the stack - * (towards higher addresses) */ - if (tail >= buftail[0].fp) - return NULL; - - return buftail[0].fp-1; -} - -void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, - const struct pt_regs *regs, unsigned int depth, void *ssp) -{ - struct frame_tail *tail; - struct thread_info *real_thread_info = THREAD_INFO(ssp); - struct stacktrace_state sts; - - sts.depth = depth; - sts.state = state; - *current_thread_info() = *real_thread_info; - - if (!current) - fiq_debugger_printf(state, "current NULL\n"); - else - fiq_debugger_printf(state, "pid: %d comm: %s\n", - current->pid, current->comm); - fiq_debugger_dump_regs(state, regs); - - if (!user_mode(regs)) { - struct stackframe frame; - frame.fp = regs->ARM_fp; - frame.sp = regs->ARM_sp; - frame.lr = regs->ARM_lr; - frame.pc = regs->ARM_pc; - fiq_debugger_printf(state, - " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", - regs->ARM_pc, regs->ARM_pc, regs->ARM_lr, regs->ARM_lr, - regs->ARM_sp, regs->ARM_fp); - walk_stackframe(&frame, report_trace, &sts); - return; - } - - tail = ((struct frame_tail *) regs->ARM_fp) - 1; - while (depth-- && tail && !((unsigned long) tail & 3)) - tail = user_backtrace(state, tail); -} - static void fiq_debugger_do_ps(struct fiq_debugger_state *state) { struct task_struct *g; @@ -692,9 +477,7 @@ static bool fiq_debugger_fiq_exec(struct fiq_debugger_state *state, if (!strcmp(cmd, "help") || !strcmp(cmd, "?")) { fiq_debugger_help(state); } else if (!strcmp(cmd, "pc")) { - fiq_debugger_printf(state, " pc %08x cpsr %08x mode %s\n", - regs->ARM_pc, regs->ARM_cpsr, - mode_name(regs->ARM_cpsr)); + fiq_debugger_dump_pc(state, regs); } else if (!strcmp(cmd, "regs")) { fiq_debugger_dump_regs(state, regs); } else if (!strcmp(cmd, "allregs")) { diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c b/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c new file mode 100644 index 000000000000..fca12737d224 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c @@ -0,0 +1,240 @@ +/* + * Copyright (C) 2014 Google, Inc. + * Author: Colin Cross + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include + +#include + +#include "fiq_debugger_priv.h" + +static char *mode_name(unsigned cpsr) +{ + switch (cpsr & MODE_MASK) { + case USR_MODE: return "USR"; + case FIQ_MODE: return "FIQ"; + case IRQ_MODE: return "IRQ"; + case SVC_MODE: return "SVC"; + case ABT_MODE: return "ABT"; + case UND_MODE: return "UND"; + case SYSTEM_MODE: return "SYS"; + default: return "???"; + } +} + +void fiq_debugger_dump_pc(struct fiq_debugger_state *state, + const struct pt_regs *regs) +{ + fiq_debugger_printf(state, " pc %08x cpsr %08x mode %s\n", + regs->ARM_pc, regs->ARM_cpsr, mode_name(regs->ARM_cpsr)); +} + +void fiq_debugger_dump_regs(struct fiq_debugger_state *state, + const struct pt_regs *regs) +{ + fiq_debugger_printf(state, + " r0 %08x r1 %08x r2 %08x r3 %08x\n", + regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3); + fiq_debugger_printf(state, + " r4 %08x r5 %08x r6 %08x r7 %08x\n", + regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7); + fiq_debugger_printf(state, + " r8 %08x r9 %08x r10 %08x r11 %08x mode %s\n", + regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp, + mode_name(regs->ARM_cpsr)); + fiq_debugger_printf(state, + " ip %08x sp %08x lr %08x pc %08x cpsr %08x\n", + regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc, + regs->ARM_cpsr); +} + +struct mode_regs { + unsigned long sp_svc; + unsigned long lr_svc; + unsigned long spsr_svc; + + unsigned long sp_abt; + unsigned long lr_abt; + unsigned long spsr_abt; + + unsigned long sp_und; + unsigned long lr_und; + unsigned long spsr_und; + + unsigned long sp_irq; + unsigned long lr_irq; + unsigned long spsr_irq; + + unsigned long r8_fiq; + unsigned long r9_fiq; + unsigned long r10_fiq; + unsigned long r11_fiq; + unsigned long r12_fiq; + unsigned long sp_fiq; + unsigned long lr_fiq; + unsigned long spsr_fiq; +}; + +static void __naked get_mode_regs(struct mode_regs *regs) +{ + asm volatile ( + "mrs r1, cpsr\n" + "msr cpsr_c, #0xd3 @(SVC_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xd7 @(ABT_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xdb @(UND_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xd2 @(IRQ_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r13 - r14}\n" + "mrs r2, spsr\n" + "msr cpsr_c, #0xd1 @(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)\n" + "stmia r0!, {r2, r8 - r14}\n" + "mrs r2, spsr\n" + "stmia r0!, {r2}\n" + "msr cpsr_c, r1\n" + "bx lr\n"); +} + + +void fiq_debugger_dump_allregs(struct fiq_debugger_state *state, + const struct pt_regs *regs) +{ + struct mode_regs mode_regs; + unsigned long mode = regs->ARM_cpsr & MODE_MASK; + + fiq_debugger_dump_regs(state, regs); + get_mode_regs(&mode_regs); + + fiq_debugger_printf(state, + "%csvc: sp %08x lr %08x spsr %08x\n", + mode == SVC_MODE ? '*' : ' ', + mode_regs.sp_svc, mode_regs.lr_svc, mode_regs.spsr_svc); + fiq_debugger_printf(state, + "%cabt: sp %08x lr %08x spsr %08x\n", + mode == ABT_MODE ? '*' : ' ', + mode_regs.sp_abt, mode_regs.lr_abt, mode_regs.spsr_abt); + fiq_debugger_printf(state, + "%cund: sp %08x lr %08x spsr %08x\n", + mode == UND_MODE ? '*' : ' ', + mode_regs.sp_und, mode_regs.lr_und, mode_regs.spsr_und); + fiq_debugger_printf(state, + "%cirq: sp %08x lr %08x spsr %08x\n", + mode == IRQ_MODE ? '*' : ' ', + mode_regs.sp_irq, mode_regs.lr_irq, mode_regs.spsr_irq); + fiq_debugger_printf(state, + "%cfiq: r8 %08x r9 %08x r10 %08x r11 %08x r12 %08x\n", + mode == FIQ_MODE ? '*' : ' ', + mode_regs.r8_fiq, mode_regs.r9_fiq, mode_regs.r10_fiq, + mode_regs.r11_fiq, mode_regs.r12_fiq); + fiq_debugger_printf(state, + " fiq: sp %08x lr %08x spsr %08x\n", + mode_regs.sp_fiq, mode_regs.lr_fiq, mode_regs.spsr_fiq); +} + +struct stacktrace_state { + struct fiq_debugger_state *state; + unsigned int depth; +}; + +static int report_trace(struct stackframe *frame, void *d) +{ + struct stacktrace_state *sts = d; + + if (sts->depth) { + fiq_debugger_printf(sts->state, + " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", + frame->pc, frame->pc, frame->lr, frame->lr, + frame->sp, frame->fp); + sts->depth--; + return 0; + } + fiq_debugger_printf(sts->state, " ...\n"); + + return sts->depth == 0; +} + +struct frame_tail { + struct frame_tail *fp; + unsigned long sp; + unsigned long lr; +} __attribute__((packed)); + +static struct frame_tail *user_backtrace(struct fiq_debugger_state *state, + struct frame_tail *tail) +{ + struct frame_tail buftail[2]; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) { + fiq_debugger_printf(state, " invalid frame pointer %p\n", + tail); + return NULL; + } + if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) { + fiq_debugger_printf(state, + " failed to copy frame pointer %p\n", tail); + return NULL; + } + + fiq_debugger_printf(state, " %p\n", buftail[0].lr); + + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (tail >= buftail[0].fp) + return NULL; + + return buftail[0].fp-1; +} + +void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, + const struct pt_regs *regs, unsigned int depth, void *ssp) +{ + struct frame_tail *tail; + struct thread_info *real_thread_info = THREAD_INFO(ssp); + struct stacktrace_state sts; + + sts.depth = depth; + sts.state = state; + *current_thread_info() = *real_thread_info; + + if (!current) + fiq_debugger_printf(state, "current NULL\n"); + else + fiq_debugger_printf(state, "pid: %d comm: %s\n", + current->pid, current->comm); + fiq_debugger_dump_regs(state, regs); + + if (!user_mode(regs)) { + struct stackframe frame; + frame.fp = regs->ARM_fp; + frame.sp = regs->ARM_sp; + frame.lr = regs->ARM_lr; + frame.pc = regs->ARM_pc; + fiq_debugger_printf(state, + " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", + regs->ARM_pc, regs->ARM_pc, regs->ARM_lr, regs->ARM_lr, + regs->ARM_sp, regs->ARM_fp); + walk_stackframe(&frame, report_trace, &sts); + return; + } + + tail = ((struct frame_tail *) regs->ARM_fp) - 1; + while (depth-- && tail && !((unsigned long) tail & 3)) + tail = user_backtrace(state, tail); +} diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h b/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h new file mode 100644 index 000000000000..746cf1b5d55b --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2014 Google, Inc. + * Author: Colin Cross + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _FIQ_DEBUGGER_PRIV_H_ +#define _FIQ_DEBUGGER_PRIV_H_ + +#define THREAD_INFO(sp) ((struct thread_info *) \ + ((unsigned long)(sp) & ~(THREAD_SIZE - 1))) + +struct fiq_debugger_state; +struct pt_regs; + +int fiq_debugger_printf(void *cookie, const char *fmt, ...); + +void fiq_debugger_dump_pc(struct fiq_debugger_state *state, + const struct pt_regs *regs); +void fiq_debugger_dump_regs(struct fiq_debugger_state *state, + const struct pt_regs *regs); +void fiq_debugger_dump_allregs(struct fiq_debugger_state *state, + const struct pt_regs *regs); +void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, + const struct pt_regs *regs, unsigned int depth, void *ssp); + +#endif -- GitLab From 516b14de21c145aaf8f63a267ea9bc7f8c572a53 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 2 Apr 2014 18:49:39 -0700 Subject: [PATCH 0886/1442] ANDROID: fiq_debugger: add ARM64 support Add fiq_debugger_arm64.c that implements the platform-specific functions. Change-Id: I4d8b96777bb8503a93d4eb47bbde8e018740a5bf Signed-off-by: Colin Cross --- drivers/staging/android/fiq_debugger/Kconfig | 2 +- drivers/staging/android/fiq_debugger/Makefile | 1 + .../android/fiq_debugger/fiq_debugger_arm64.c | 202 ++++++++++++++++++ 3 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c diff --git a/drivers/staging/android/fiq_debugger/Kconfig b/drivers/staging/android/fiq_debugger/Kconfig index 803fb59cc82f..d87978cc7dee 100644 --- a/drivers/staging/android/fiq_debugger/Kconfig +++ b/drivers/staging/android/fiq_debugger/Kconfig @@ -1,7 +1,7 @@ config FIQ_DEBUGGER bool "FIQ Mode Serial Debugger" default n - depends on ARM + depends on ARM || ARM64 help The FIQ serial debugger can accept commands even when the kernel is unresponsive due to being stuck with interrupts diff --git a/drivers/staging/android/fiq_debugger/Makefile b/drivers/staging/android/fiq_debugger/Makefile index 1e203aed8c62..c95da7eb1b33 100644 --- a/drivers/staging/android/fiq_debugger/Makefile +++ b/drivers/staging/android/fiq_debugger/Makefile @@ -1,2 +1,3 @@ obj-y += fiq_debugger.o obj-$(CONFIG_ARM) += fiq_debugger_arm.o +obj-$(CONFIG_ARM64) += fiq_debugger_arm64.o \ No newline at end of file diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c new file mode 100644 index 000000000000..cbca82b70533 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c @@ -0,0 +1,202 @@ +/* + * Copyright (C) 2014 Google, Inc. + * Author: Colin Cross + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include + +#include "fiq_debugger_priv.h" + +static char *mode_name(const struct pt_regs *regs) +{ + if (compat_user_mode(regs)) { + return "USR"; + } else { + switch (processor_mode(regs)) { + case PSR_MODE_EL0t: return "EL0t"; + case PSR_MODE_EL1t: return "EL1t"; + case PSR_MODE_EL1h: return "EL1h"; + case PSR_MODE_EL2t: return "EL2t"; + case PSR_MODE_EL2h: return "EL2h"; + default: return "???"; + } + } +} + +void fiq_debugger_dump_pc(struct fiq_debugger_state *state, + const struct pt_regs *regs) +{ + fiq_debugger_printf(state, " pc %016lx cpsr %08lx mode %s\n", + regs->pc, regs->pstate, mode_name(regs)); +} + +void fiq_debugger_dump_regs_aarch32(struct fiq_debugger_state *state, + const struct pt_regs *regs) +{ + fiq_debugger_printf(state, " r0 %08x r1 %08x r2 %08x r3 %08x\n", + regs->compat_usr(0), regs->compat_usr(1), + regs->compat_usr(2), regs->compat_usr(3)); + fiq_debugger_printf(state, " r4 %08x r5 %08x r6 %08x r7 %08x\n", + regs->compat_usr(4), regs->compat_usr(5), + regs->compat_usr(6), regs->compat_usr(7)); + fiq_debugger_printf(state, " r8 %08x r9 %08x r10 %08x r11 %08x\n", + regs->compat_usr(8), regs->compat_usr(9), + regs->compat_usr(10), regs->compat_usr(11)); + fiq_debugger_printf(state, " ip %08x sp %08x lr %08x pc %08x\n", + regs->compat_usr(12), regs->compat_sp, + regs->compat_lr, regs->pc); + fiq_debugger_printf(state, " cpsr %08x (%s)\n", + regs->pstate, mode_name(regs)); +} + +void fiq_debugger_dump_regs_aarch64(struct fiq_debugger_state *state, + const struct pt_regs *regs) +{ + + fiq_debugger_printf(state, " x0 %016lx x1 %016lx\n", + regs->regs[0], regs->regs[1]); + fiq_debugger_printf(state, " x2 %016lx x3 %016lx\n", + regs->regs[2], regs->regs[3]); + fiq_debugger_printf(state, " x4 %016lx x5 %016lx\n", + regs->regs[4], regs->regs[5]); + fiq_debugger_printf(state, " x6 %016lx x7 %016lx\n", + regs->regs[6], regs->regs[7]); + fiq_debugger_printf(state, " x8 %016lx x9 %016lx\n", + regs->regs[8], regs->regs[9]); + fiq_debugger_printf(state, " x10 %016lx x11 %016lx\n", + regs->regs[10], regs->regs[11]); + fiq_debugger_printf(state, " x12 %016lx x13 %016lx\n", + regs->regs[12], regs->regs[13]); + fiq_debugger_printf(state, " x14 %016lx x15 %016lx\n", + regs->regs[14], regs->regs[15]); + fiq_debugger_printf(state, " x16 %016lx x17 %016lx\n", + regs->regs[16], regs->regs[17]); + fiq_debugger_printf(state, " x18 %016lx x19 %016lx\n", + regs->regs[18], regs->regs[19]); + fiq_debugger_printf(state, " x20 %016lx x21 %016lx\n", + regs->regs[20], regs->regs[21]); + fiq_debugger_printf(state, " x22 %016lx x23 %016lx\n", + regs->regs[22], regs->regs[23]); + fiq_debugger_printf(state, " x24 %016lx x25 %016lx\n", + regs->regs[24], regs->regs[25]); + fiq_debugger_printf(state, " x26 %016lx x27 %016lx\n", + regs->regs[26], regs->regs[27]); + fiq_debugger_printf(state, " x28 %016lx x29 %016lx\n", + regs->regs[28], regs->regs[29]); + fiq_debugger_printf(state, " x30 %016lx sp %016lx\n", + regs->regs[30], regs->sp); + fiq_debugger_printf(state, " pc %016lx cpsr %08x (%s)\n", + regs->pc, regs->pstate, mode_name(regs)); +} + +void fiq_debugger_dump_regs(struct fiq_debugger_state *state, + const struct pt_regs *regs) +{ + if (compat_user_mode(regs)) + fiq_debugger_dump_regs_aarch32(state, regs); + else + fiq_debugger_dump_regs_aarch64(state, regs); +} + +#define READ_SPECIAL_REG(x) ({ \ + u64 val; \ + asm volatile ("mrs %0, " # x : "=r"(val)); \ + val; \ +}) + +void fiq_debugger_dump_allregs(struct fiq_debugger_state *state, + const struct pt_regs *regs) +{ + u32 pstate = READ_SPECIAL_REG(CurrentEl); + bool in_el2 = (pstate & PSR_MODE_MASK) >= PSR_MODE_EL2t; + + fiq_debugger_dump_regs(state, regs); + + fiq_debugger_printf(state, " sp_el0 %016lx\n", + READ_SPECIAL_REG(sp_el0)); + + if (in_el2) + fiq_debugger_printf(state, " sp_el1 %016lx\n", + READ_SPECIAL_REG(sp_el1)); + + fiq_debugger_printf(state, " elr_el1 %016lx\n", + READ_SPECIAL_REG(elr_el1)); + + fiq_debugger_printf(state, " spsr_el1 %08lx\n", + READ_SPECIAL_REG(spsr_el1)); + + if (in_el2) { + fiq_debugger_printf(state, " spsr_irq %08lx\n", + READ_SPECIAL_REG(spsr_irq)); + fiq_debugger_printf(state, " spsr_abt %08lx\n", + READ_SPECIAL_REG(spsr_abt)); + fiq_debugger_printf(state, " spsr_und %08lx\n", + READ_SPECIAL_REG(spsr_und)); + fiq_debugger_printf(state, " spsr_fiq %08lx\n", + READ_SPECIAL_REG(spsr_fiq)); + fiq_debugger_printf(state, " spsr_el2 %08lx\n", + READ_SPECIAL_REG(elr_el2)); + fiq_debugger_printf(state, " spsr_el2 %08lx\n", + READ_SPECIAL_REG(spsr_el2)); + } +} + +struct stacktrace_state { + struct fiq_debugger_state *state; + unsigned int depth; +}; + +static int report_trace(struct stackframe *frame, void *d) +{ + struct stacktrace_state *sts = d; + + if (sts->depth) { + fiq_debugger_printf(sts->state, "%pF:\n", frame->pc); + fiq_debugger_printf(sts->state, + " pc %016lx sp %016lx fp %016lx\n", + frame->pc, frame->sp, frame->fp); + sts->depth--; + return 0; + } + fiq_debugger_printf(sts->state, " ...\n"); + + return sts->depth == 0; +} + +void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, + const struct pt_regs *regs, unsigned int depth, void *ssp) +{ + struct thread_info *real_thread_info = THREAD_INFO(ssp); + struct stacktrace_state sts; + + sts.depth = depth; + sts.state = state; + *current_thread_info() = *real_thread_info; + + if (!current) + fiq_debugger_printf(state, "current NULL\n"); + else + fiq_debugger_printf(state, "pid: %d comm: %s\n", + current->pid, current->comm); + fiq_debugger_dump_regs(state, regs); + + if (!user_mode(regs)) { + struct stackframe frame; + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; + fiq_debugger_printf(state, "\n"); + walk_stackframe(&frame, report_trace, &sts); + } +} -- GitLab From e073df688689fb6f09f15a8d11bd83f3b5138dfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Fri, 2 May 2014 19:52:54 -0700 Subject: [PATCH 0887/1442] ANDROID: fiq_debugger: Call fiq_debugger_printf through a function pointer from cpu specific code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows the output from the register and stack trace code to be sent elsewhere. Change-Id: I41bb0d5a25e1b9ca55feef5dbd675818b2f832d5 Signed-off-by: Arve Hjønnevåg --- .../android/fiq_debugger/fiq_debugger.c | 67 ++++++------ .../android/fiq_debugger/fiq_debugger_arm.c | 58 +++++----- .../android/fiq_debugger/fiq_debugger_arm64.c | 102 +++++++++--------- .../android/fiq_debugger/fiq_debugger_priv.h | 15 +-- 4 files changed, 123 insertions(+), 119 deletions(-) diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c index 5516d3113b94..7d6b4ae8a2cd 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -54,6 +54,7 @@ struct fiq_debugger_state { #ifdef CONFIG_FIQ_GLUE struct fiq_glue_handler handler; #endif + struct fiq_debugger_output output; int fiq; int uart_irq; @@ -229,18 +230,19 @@ static void fiq_debugger_dump_kernel_log(struct fiq_debugger_state *state) } } -int fiq_debugger_printf(void *cookie, const char *fmt, ...) +static void fiq_debugger_printf(struct fiq_debugger_output *output, + const char *fmt, ...) { - struct fiq_debugger_state *state = cookie; + struct fiq_debugger_state *state; char buf[256]; va_list ap; + state = container_of(output, struct fiq_debugger_state, output); va_start(ap, fmt); vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); fiq_debugger_puts(state, buf); - return state->debug_abort; } /* Safe outside fiq context */ @@ -267,13 +269,13 @@ static void fiq_debugger_dump_irqs(struct fiq_debugger_state *state) int n; struct irq_desc *desc; - fiq_debugger_printf(state, + fiq_debugger_printf(&state->output, "irqnr total since-last status name\n"); for_each_irq_desc(n, desc) { struct irqaction *act = desc->action; if (!act && !kstat_irqs(n)) continue; - fiq_debugger_printf(state, "%5d: %10u %11u %8x %s\n", n, + fiq_debugger_printf(&state->output, "%5d: %10u %11u %8x %s\n", n, kstat_irqs(n), kstat_irqs(n) - state->last_irqs[n], desc->status_use_accessors, @@ -289,18 +291,18 @@ static void fiq_debugger_do_ps(struct fiq_debugger_state *state) unsigned task_state; static const char stat_nam[] = "RSDTtZX"; - fiq_debugger_printf(state, "pid ppid prio task pc\n"); + fiq_debugger_printf(&state->output, "pid ppid prio task pc\n"); read_lock(&tasklist_lock); do_each_thread(g, p) { task_state = p->state ? __ffs(p->state) + 1 : 0; - fiq_debugger_printf(state, + fiq_debugger_printf(&state->output, "%5d %5d %4d ", p->pid, p->parent->pid, p->prio); - fiq_debugger_printf(state, "%-13.13s %c", p->comm, + fiq_debugger_printf(&state->output, "%-13.13s %c", p->comm, task_state >= sizeof(stat_nam) ? '?' : stat_nam[task_state]); if (task_state == TASK_RUNNING) - fiq_debugger_printf(state, " running\n"); + fiq_debugger_printf(&state->output, " running\n"); else - fiq_debugger_printf(state, " %08lx\n", + fiq_debugger_printf(&state->output, " %08lx\n", thread_saved_pc(p)); } while_each_thread(g, p); read_unlock(&tasklist_lock); @@ -332,7 +334,7 @@ static void fiq_debugger_end_syslog_dump(struct fiq_debugger_state *state) static void fiq_debugger_do_sysrq(struct fiq_debugger_state *state, char rq) { if ((rq == 'g' || rq == 'G') && !fiq_kgdb_enable) { - fiq_debugger_printf(state, "sysrq-g blocked\n"); + fiq_debugger_printf(&state->output, "sysrq-g blocked\n"); return; } fiq_debugger_begin_syslog_dump(state); @@ -344,11 +346,11 @@ static void fiq_debugger_do_sysrq(struct fiq_debugger_state *state, char rq) static void fiq_debugger_do_kgdb(struct fiq_debugger_state *state) { if (!fiq_kgdb_enable) { - fiq_debugger_printf(state, "kgdb through fiq debugger not enabled\n"); + fiq_debugger_printf(&state->output, "kgdb through fiq debugger not enabled\n"); return; } - fiq_debugger_printf(state, "enabling console and triggering kgdb\n"); + fiq_debugger_printf(&state->output, "enabling console and triggering kgdb\n"); state->console_enable = true; handle_sysrq('g'); } @@ -361,7 +363,7 @@ static void fiq_debugger_schedule_work(struct fiq_debugger_state *state, spin_lock_irqsave(&state->work_lock, flags); if (state->work_cmd[0] != '\0') { - fiq_debugger_printf(state, "work command processor busy\n"); + fiq_debugger_printf(&state->output, "work command processor busy\n"); spin_unlock_irqrestore(&state->work_lock, flags); return; } @@ -398,7 +400,7 @@ static void fiq_debugger_work(struct work_struct *work) else kernel_restart(NULL); } else { - fiq_debugger_printf(state, "unknown work command '%s'\n", + fiq_debugger_printf(&state->output, "unknown work command '%s'\n", work_cmd); } } @@ -422,7 +424,7 @@ static void fiq_debugger_irq_exec(struct fiq_debugger_state *state, char *cmd) static void fiq_debugger_help(struct fiq_debugger_state *state) { - fiq_debugger_printf(state, + fiq_debugger_printf(&state->output, "FIQ Debugger commands:\n" " pc PC status\n" " regs Register dump\n" @@ -433,18 +435,18 @@ static void fiq_debugger_help(struct fiq_debugger_state *state) " irqs Interupt status\n" " kmsg Kernel log\n" " version Kernel version\n"); - fiq_debugger_printf(state, + fiq_debugger_printf(&state->output, " sleep Allow sleep while in FIQ\n" " nosleep Disable sleep while in FIQ\n" " console Switch terminal to console\n" " cpu Current CPU\n" " cpu Switch to CPU\n"); - fiq_debugger_printf(state, + fiq_debugger_printf(&state->output, " ps Process list\n" " sysrq sysrq options\n" " sysrq Execute sysrq with \n"); #ifdef CONFIG_KGDB - fiq_debugger_printf(state, + fiq_debugger_printf(&state->output, " kgdb Enter kernel debugger\n"); #endif } @@ -477,13 +479,13 @@ static bool fiq_debugger_fiq_exec(struct fiq_debugger_state *state, if (!strcmp(cmd, "help") || !strcmp(cmd, "?")) { fiq_debugger_help(state); } else if (!strcmp(cmd, "pc")) { - fiq_debugger_dump_pc(state, regs); + fiq_debugger_dump_pc(&state->output, regs); } else if (!strcmp(cmd, "regs")) { - fiq_debugger_dump_regs(state, regs); + fiq_debugger_dump_regs(&state->output, regs); } else if (!strcmp(cmd, "allregs")) { - fiq_debugger_dump_allregs(state, regs); + fiq_debugger_dump_allregs(&state->output, regs); } else if (!strcmp(cmd, "bt")) { - fiq_debugger_dump_stacktrace(state, regs, 100, svc_sp); + fiq_debugger_dump_stacktrace(&state->output, regs, 100, svc_sp); } else if (!strncmp(cmd, "reset", 5)) { cmd += 5; while (*cmd == ' ') @@ -500,29 +502,29 @@ static bool fiq_debugger_fiq_exec(struct fiq_debugger_state *state, } else if (!strcmp(cmd, "kmsg")) { fiq_debugger_dump_kernel_log(state); } else if (!strcmp(cmd, "version")) { - fiq_debugger_printf(state, "%s\n", linux_banner); + fiq_debugger_printf(&state->output, "%s\n", linux_banner); } else if (!strcmp(cmd, "sleep")) { state->no_sleep = false; - fiq_debugger_printf(state, "enabling sleep\n"); + fiq_debugger_printf(&state->output, "enabling sleep\n"); } else if (!strcmp(cmd, "nosleep")) { state->no_sleep = true; - fiq_debugger_printf(state, "disabling sleep\n"); + fiq_debugger_printf(&state->output, "disabling sleep\n"); } else if (!strcmp(cmd, "console")) { - fiq_debugger_printf(state, "console mode\n"); + fiq_debugger_printf(&state->output, "console mode\n"); fiq_debugger_uart_flush(state); state->console_enable = true; } else if (!strcmp(cmd, "cpu")) { - fiq_debugger_printf(state, "cpu %d\n", state->current_cpu); + fiq_debugger_printf(&state->output, "cpu %d\n", state->current_cpu); } else if (!strncmp(cmd, "cpu ", 4)) { unsigned long cpu = 0; if (strict_strtoul(cmd + 4, 10, &cpu) == 0) fiq_debugger_switch_cpu(state, cpu); else - fiq_debugger_printf(state, "invalid cpu\n"); - fiq_debugger_printf(state, "cpu %d\n", state->current_cpu); + fiq_debugger_printf(&state->output, "invalid cpu\n"); + fiq_debugger_printf(&state->output, "cpu %d\n", state->current_cpu); } else { if (state->debug_busy) { - fiq_debugger_printf(state, + fiq_debugger_printf(&state->output, "command processor busy. trying to abort.\n"); state->debug_abort = -1; } else { @@ -645,7 +647,7 @@ static bool fiq_debugger_handle_uart_interrupt(struct fiq_debugger_state *state, MAX_UNHANDLED_FIQ_COUNT) return false; - fiq_debugger_printf(state, + fiq_debugger_printf(&state->output, "fiq_debugger: cpu %d not responding, " "reverting to cpu %d\n", state->current_cpu, this_cpu); @@ -1053,6 +1055,7 @@ static int fiq_debugger_probe(struct platform_device *pdev) return -EINVAL; state = kzalloc(sizeof(*state), GFP_KERNEL); + state->output.printf = fiq_debugger_printf; setup_timer(&state->sleep_timer, fiq_debugger_sleep_timer_expired, (unsigned long)state); state->pdata = pdata; diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c b/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c index fca12737d224..8b3e0137be1a 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c @@ -34,27 +34,27 @@ static char *mode_name(unsigned cpsr) } } -void fiq_debugger_dump_pc(struct fiq_debugger_state *state, +void fiq_debugger_dump_pc(struct fiq_debugger_output *output, const struct pt_regs *regs) { - fiq_debugger_printf(state, " pc %08x cpsr %08x mode %s\n", + output->printf(output, " pc %08x cpsr %08x mode %s\n", regs->ARM_pc, regs->ARM_cpsr, mode_name(regs->ARM_cpsr)); } -void fiq_debugger_dump_regs(struct fiq_debugger_state *state, +void fiq_debugger_dump_regs(struct fiq_debugger_output *output, const struct pt_regs *regs) { - fiq_debugger_printf(state, + output->printf(output, " r0 %08x r1 %08x r2 %08x r3 %08x\n", regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3); - fiq_debugger_printf(state, + output->printf(output, " r4 %08x r5 %08x r6 %08x r7 %08x\n", regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7); - fiq_debugger_printf(state, + output->printf(output, " r8 %08x r9 %08x r10 %08x r11 %08x mode %s\n", regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp, mode_name(regs->ARM_cpsr)); - fiq_debugger_printf(state, + output->printf(output, " ip %08x sp %08x lr %08x pc %08x cpsr %08x\n", regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc, regs->ARM_cpsr); @@ -112,43 +112,43 @@ static void __naked get_mode_regs(struct mode_regs *regs) } -void fiq_debugger_dump_allregs(struct fiq_debugger_state *state, +void fiq_debugger_dump_allregs(struct fiq_debugger_output *output, const struct pt_regs *regs) { struct mode_regs mode_regs; unsigned long mode = regs->ARM_cpsr & MODE_MASK; - fiq_debugger_dump_regs(state, regs); + fiq_debugger_dump_regs(output, regs); get_mode_regs(&mode_regs); - fiq_debugger_printf(state, + output->printf(output, "%csvc: sp %08x lr %08x spsr %08x\n", mode == SVC_MODE ? '*' : ' ', mode_regs.sp_svc, mode_regs.lr_svc, mode_regs.spsr_svc); - fiq_debugger_printf(state, + output->printf(output, "%cabt: sp %08x lr %08x spsr %08x\n", mode == ABT_MODE ? '*' : ' ', mode_regs.sp_abt, mode_regs.lr_abt, mode_regs.spsr_abt); - fiq_debugger_printf(state, + output->printf(output, "%cund: sp %08x lr %08x spsr %08x\n", mode == UND_MODE ? '*' : ' ', mode_regs.sp_und, mode_regs.lr_und, mode_regs.spsr_und); - fiq_debugger_printf(state, + output->printf(output, "%cirq: sp %08x lr %08x spsr %08x\n", mode == IRQ_MODE ? '*' : ' ', mode_regs.sp_irq, mode_regs.lr_irq, mode_regs.spsr_irq); - fiq_debugger_printf(state, + output->printf(output, "%cfiq: r8 %08x r9 %08x r10 %08x r11 %08x r12 %08x\n", mode == FIQ_MODE ? '*' : ' ', mode_regs.r8_fiq, mode_regs.r9_fiq, mode_regs.r10_fiq, mode_regs.r11_fiq, mode_regs.r12_fiq); - fiq_debugger_printf(state, + output->printf(output, " fiq: sp %08x lr %08x spsr %08x\n", mode_regs.sp_fiq, mode_regs.lr_fiq, mode_regs.spsr_fiq); } struct stacktrace_state { - struct fiq_debugger_state *state; + struct fiq_debugger_output *output; unsigned int depth; }; @@ -157,14 +157,14 @@ static int report_trace(struct stackframe *frame, void *d) struct stacktrace_state *sts = d; if (sts->depth) { - fiq_debugger_printf(sts->state, + sts->output->printf(sts->output, " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", frame->pc, frame->pc, frame->lr, frame->lr, frame->sp, frame->fp); sts->depth--; return 0; } - fiq_debugger_printf(sts->state, " ...\n"); + sts->output->printf(sts->output, " ...\n"); return sts->depth == 0; } @@ -175,24 +175,24 @@ struct frame_tail { unsigned long lr; } __attribute__((packed)); -static struct frame_tail *user_backtrace(struct fiq_debugger_state *state, +static struct frame_tail *user_backtrace(struct fiq_debugger_output *output, struct frame_tail *tail) { struct frame_tail buftail[2]; /* Also check accessibility of one struct frame_tail beyond */ if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) { - fiq_debugger_printf(state, " invalid frame pointer %p\n", + output->printf(output, " invalid frame pointer %p\n", tail); return NULL; } if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) { - fiq_debugger_printf(state, + output->printf(output, " failed to copy frame pointer %p\n", tail); return NULL; } - fiq_debugger_printf(state, " %p\n", buftail[0].lr); + output->printf(output, " %p\n", buftail[0].lr); /* frame pointers should strictly progress back up the stack * (towards higher addresses) */ @@ -202,7 +202,7 @@ static struct frame_tail *user_backtrace(struct fiq_debugger_state *state, return buftail[0].fp-1; } -void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, +void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output, const struct pt_regs *regs, unsigned int depth, void *ssp) { struct frame_tail *tail; @@ -210,15 +210,15 @@ void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, struct stacktrace_state sts; sts.depth = depth; - sts.state = state; + sts.output = output; *current_thread_info() = *real_thread_info; if (!current) - fiq_debugger_printf(state, "current NULL\n"); + output->printf(output, "current NULL\n"); else - fiq_debugger_printf(state, "pid: %d comm: %s\n", + output->printf(output, "pid: %d comm: %s\n", current->pid, current->comm); - fiq_debugger_dump_regs(state, regs); + fiq_debugger_dump_regs(output, regs); if (!user_mode(regs)) { struct stackframe frame; @@ -226,7 +226,7 @@ void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, frame.sp = regs->ARM_sp; frame.lr = regs->ARM_lr; frame.pc = regs->ARM_pc; - fiq_debugger_printf(state, + output->printf(output, " pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n", regs->ARM_pc, regs->ARM_pc, regs->ARM_lr, regs->ARM_lr, regs->ARM_sp, regs->ARM_fp); @@ -236,5 +236,5 @@ void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, tail = ((struct frame_tail *) regs->ARM_fp) - 1; while (depth-- && tail && !((unsigned long) tail & 3)) - tail = user_backtrace(state, tail); + tail = user_backtrace(output, tail); } diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c index cbca82b70533..99c6584fcfa5 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c @@ -34,79 +34,79 @@ static char *mode_name(const struct pt_regs *regs) } } -void fiq_debugger_dump_pc(struct fiq_debugger_state *state, +void fiq_debugger_dump_pc(struct fiq_debugger_output *output, const struct pt_regs *regs) { - fiq_debugger_printf(state, " pc %016lx cpsr %08lx mode %s\n", + output->printf(output, " pc %016lx cpsr %08lx mode %s\n", regs->pc, regs->pstate, mode_name(regs)); } -void fiq_debugger_dump_regs_aarch32(struct fiq_debugger_state *state, +void fiq_debugger_dump_regs_aarch32(struct fiq_debugger_output *output, const struct pt_regs *regs) { - fiq_debugger_printf(state, " r0 %08x r1 %08x r2 %08x r3 %08x\n", + output->printf(output, " r0 %08x r1 %08x r2 %08x r3 %08x\n", regs->compat_usr(0), regs->compat_usr(1), regs->compat_usr(2), regs->compat_usr(3)); - fiq_debugger_printf(state, " r4 %08x r5 %08x r6 %08x r7 %08x\n", + output->printf(output, " r4 %08x r5 %08x r6 %08x r7 %08x\n", regs->compat_usr(4), regs->compat_usr(5), regs->compat_usr(6), regs->compat_usr(7)); - fiq_debugger_printf(state, " r8 %08x r9 %08x r10 %08x r11 %08x\n", + output->printf(output, " r8 %08x r9 %08x r10 %08x r11 %08x\n", regs->compat_usr(8), regs->compat_usr(9), regs->compat_usr(10), regs->compat_usr(11)); - fiq_debugger_printf(state, " ip %08x sp %08x lr %08x pc %08x\n", + output->printf(output, " ip %08x sp %08x lr %08x pc %08x\n", regs->compat_usr(12), regs->compat_sp, regs->compat_lr, regs->pc); - fiq_debugger_printf(state, " cpsr %08x (%s)\n", + output->printf(output, " cpsr %08x (%s)\n", regs->pstate, mode_name(regs)); } -void fiq_debugger_dump_regs_aarch64(struct fiq_debugger_state *state, +void fiq_debugger_dump_regs_aarch64(struct fiq_debugger_output *output, const struct pt_regs *regs) { - fiq_debugger_printf(state, " x0 %016lx x1 %016lx\n", + output->printf(output, " x0 %016lx x1 %016lx\n", regs->regs[0], regs->regs[1]); - fiq_debugger_printf(state, " x2 %016lx x3 %016lx\n", + output->printf(output, " x2 %016lx x3 %016lx\n", regs->regs[2], regs->regs[3]); - fiq_debugger_printf(state, " x4 %016lx x5 %016lx\n", + output->printf(output, " x4 %016lx x5 %016lx\n", regs->regs[4], regs->regs[5]); - fiq_debugger_printf(state, " x6 %016lx x7 %016lx\n", + output->printf(output, " x6 %016lx x7 %016lx\n", regs->regs[6], regs->regs[7]); - fiq_debugger_printf(state, " x8 %016lx x9 %016lx\n", + output->printf(output, " x8 %016lx x9 %016lx\n", regs->regs[8], regs->regs[9]); - fiq_debugger_printf(state, " x10 %016lx x11 %016lx\n", + output->printf(output, " x10 %016lx x11 %016lx\n", regs->regs[10], regs->regs[11]); - fiq_debugger_printf(state, " x12 %016lx x13 %016lx\n", + output->printf(output, " x12 %016lx x13 %016lx\n", regs->regs[12], regs->regs[13]); - fiq_debugger_printf(state, " x14 %016lx x15 %016lx\n", + output->printf(output, " x14 %016lx x15 %016lx\n", regs->regs[14], regs->regs[15]); - fiq_debugger_printf(state, " x16 %016lx x17 %016lx\n", + output->printf(output, " x16 %016lx x17 %016lx\n", regs->regs[16], regs->regs[17]); - fiq_debugger_printf(state, " x18 %016lx x19 %016lx\n", + output->printf(output, " x18 %016lx x19 %016lx\n", regs->regs[18], regs->regs[19]); - fiq_debugger_printf(state, " x20 %016lx x21 %016lx\n", + output->printf(output, " x20 %016lx x21 %016lx\n", regs->regs[20], regs->regs[21]); - fiq_debugger_printf(state, " x22 %016lx x23 %016lx\n", + output->printf(output, " x22 %016lx x23 %016lx\n", regs->regs[22], regs->regs[23]); - fiq_debugger_printf(state, " x24 %016lx x25 %016lx\n", + output->printf(output, " x24 %016lx x25 %016lx\n", regs->regs[24], regs->regs[25]); - fiq_debugger_printf(state, " x26 %016lx x27 %016lx\n", + output->printf(output, " x26 %016lx x27 %016lx\n", regs->regs[26], regs->regs[27]); - fiq_debugger_printf(state, " x28 %016lx x29 %016lx\n", + output->printf(output, " x28 %016lx x29 %016lx\n", regs->regs[28], regs->regs[29]); - fiq_debugger_printf(state, " x30 %016lx sp %016lx\n", + output->printf(output, " x30 %016lx sp %016lx\n", regs->regs[30], regs->sp); - fiq_debugger_printf(state, " pc %016lx cpsr %08x (%s)\n", + output->printf(output, " pc %016lx cpsr %08x (%s)\n", regs->pc, regs->pstate, mode_name(regs)); } -void fiq_debugger_dump_regs(struct fiq_debugger_state *state, +void fiq_debugger_dump_regs(struct fiq_debugger_output *output, const struct pt_regs *regs) { if (compat_user_mode(regs)) - fiq_debugger_dump_regs_aarch32(state, regs); + fiq_debugger_dump_regs_aarch32(output, regs); else - fiq_debugger_dump_regs_aarch64(state, regs); + fiq_debugger_dump_regs_aarch64(output, regs); } #define READ_SPECIAL_REG(x) ({ \ @@ -115,45 +115,45 @@ void fiq_debugger_dump_regs(struct fiq_debugger_state *state, val; \ }) -void fiq_debugger_dump_allregs(struct fiq_debugger_state *state, +void fiq_debugger_dump_allregs(struct fiq_debugger_output *output, const struct pt_regs *regs) { u32 pstate = READ_SPECIAL_REG(CurrentEl); bool in_el2 = (pstate & PSR_MODE_MASK) >= PSR_MODE_EL2t; - fiq_debugger_dump_regs(state, regs); + fiq_debugger_dump_regs(output, regs); - fiq_debugger_printf(state, " sp_el0 %016lx\n", + output->printf(output, " sp_el0 %016lx\n", READ_SPECIAL_REG(sp_el0)); if (in_el2) - fiq_debugger_printf(state, " sp_el1 %016lx\n", + output->printf(output, " sp_el1 %016lx\n", READ_SPECIAL_REG(sp_el1)); - fiq_debugger_printf(state, " elr_el1 %016lx\n", + output->printf(output, " elr_el1 %016lx\n", READ_SPECIAL_REG(elr_el1)); - fiq_debugger_printf(state, " spsr_el1 %08lx\n", + output->printf(output, " spsr_el1 %08lx\n", READ_SPECIAL_REG(spsr_el1)); if (in_el2) { - fiq_debugger_printf(state, " spsr_irq %08lx\n", + output->printf(output, " spsr_irq %08lx\n", READ_SPECIAL_REG(spsr_irq)); - fiq_debugger_printf(state, " spsr_abt %08lx\n", + output->printf(output, " spsr_abt %08lx\n", READ_SPECIAL_REG(spsr_abt)); - fiq_debugger_printf(state, " spsr_und %08lx\n", + output->printf(output, " spsr_und %08lx\n", READ_SPECIAL_REG(spsr_und)); - fiq_debugger_printf(state, " spsr_fiq %08lx\n", + output->printf(output, " spsr_fiq %08lx\n", READ_SPECIAL_REG(spsr_fiq)); - fiq_debugger_printf(state, " spsr_el2 %08lx\n", + output->printf(output, " spsr_el2 %08lx\n", READ_SPECIAL_REG(elr_el2)); - fiq_debugger_printf(state, " spsr_el2 %08lx\n", + output->printf(output, " spsr_el2 %08lx\n", READ_SPECIAL_REG(spsr_el2)); } } struct stacktrace_state { - struct fiq_debugger_state *state; + struct fiq_debugger_output *output; unsigned int depth; }; @@ -162,41 +162,41 @@ static int report_trace(struct stackframe *frame, void *d) struct stacktrace_state *sts = d; if (sts->depth) { - fiq_debugger_printf(sts->state, "%pF:\n", frame->pc); - fiq_debugger_printf(sts->state, + sts->output->printf(sts->output, "%pF:\n", frame->pc); + sts->output->printf(sts->output, " pc %016lx sp %016lx fp %016lx\n", frame->pc, frame->sp, frame->fp); sts->depth--; return 0; } - fiq_debugger_printf(sts->state, " ...\n"); + sts->output->printf(sts->output, " ...\n"); return sts->depth == 0; } -void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, +void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output, const struct pt_regs *regs, unsigned int depth, void *ssp) { struct thread_info *real_thread_info = THREAD_INFO(ssp); struct stacktrace_state sts; sts.depth = depth; - sts.state = state; + sts.output = output; *current_thread_info() = *real_thread_info; if (!current) - fiq_debugger_printf(state, "current NULL\n"); + output->printf(output, "current NULL\n"); else - fiq_debugger_printf(state, "pid: %d comm: %s\n", + output->printf(output, "pid: %d comm: %s\n", current->pid, current->comm); - fiq_debugger_dump_regs(state, regs); + fiq_debugger_dump_regs(output, regs); if (!user_mode(regs)) { struct stackframe frame; frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; - fiq_debugger_printf(state, "\n"); + output->printf(output, "\n"); walk_stackframe(&frame, report_trace, &sts); } } diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h b/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h index 746cf1b5d55b..d5d051f727a8 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h @@ -19,18 +19,19 @@ #define THREAD_INFO(sp) ((struct thread_info *) \ ((unsigned long)(sp) & ~(THREAD_SIZE - 1))) -struct fiq_debugger_state; -struct pt_regs; +struct fiq_debugger_output { + void (*printf)(struct fiq_debugger_output *output, const char *fmt, ...); +}; -int fiq_debugger_printf(void *cookie, const char *fmt, ...); +struct pt_regs; -void fiq_debugger_dump_pc(struct fiq_debugger_state *state, +void fiq_debugger_dump_pc(struct fiq_debugger_output *output, const struct pt_regs *regs); -void fiq_debugger_dump_regs(struct fiq_debugger_state *state, +void fiq_debugger_dump_regs(struct fiq_debugger_output *output, const struct pt_regs *regs); -void fiq_debugger_dump_allregs(struct fiq_debugger_state *state, +void fiq_debugger_dump_allregs(struct fiq_debugger_output *output, const struct pt_regs *regs); -void fiq_debugger_dump_stacktrace(struct fiq_debugger_state *state, +void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output, const struct pt_regs *regs, unsigned int depth, void *ssp); #endif -- GitLab From 60c384b6122aab12158ad8a36d692e313f29eda3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Fri, 2 May 2014 20:31:07 -0700 Subject: [PATCH 0888/1442] ANDROID: fiq_debugger: Add fiq_watchdog_triggered api MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dumps registers and stacktrace into console-ramoops when called from a watchdog fiq. Change-Id: Ib6fab5a52f670db18e64214d5e4890e8292a749c Signed-off-by: Arve Hjønnevåg --- drivers/staging/android/fiq_debugger/Kconfig | 6 ++ drivers/staging/android/fiq_debugger/Makefile | 3 +- .../android/fiq_debugger/fiq_watchdog.c | 56 +++++++++++++++++++ .../android/fiq_debugger/fiq_watchdog.h | 20 +++++++ 4 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 drivers/staging/android/fiq_debugger/fiq_watchdog.c create mode 100644 drivers/staging/android/fiq_debugger/fiq_watchdog.h diff --git a/drivers/staging/android/fiq_debugger/Kconfig b/drivers/staging/android/fiq_debugger/Kconfig index d87978cc7dee..56f7f999377e 100644 --- a/drivers/staging/android/fiq_debugger/Kconfig +++ b/drivers/staging/android/fiq_debugger/Kconfig @@ -41,3 +41,9 @@ config FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE help If enabled, this puts the fiq debugger into console mode by default. Otherwise, the fiq debugger will start out in debug mode. + +config FIQ_WATCHDOG + bool + select FIQ_DEBUGGER + select PSTORE_RAM + default n diff --git a/drivers/staging/android/fiq_debugger/Makefile b/drivers/staging/android/fiq_debugger/Makefile index c95da7eb1b33..a7ca4871cad3 100644 --- a/drivers/staging/android/fiq_debugger/Makefile +++ b/drivers/staging/android/fiq_debugger/Makefile @@ -1,3 +1,4 @@ obj-y += fiq_debugger.o obj-$(CONFIG_ARM) += fiq_debugger_arm.o -obj-$(CONFIG_ARM64) += fiq_debugger_arm64.o \ No newline at end of file +obj-$(CONFIG_ARM64) += fiq_debugger_arm64.o +obj-$(CONFIG_FIQ_WATCHDOG) += fiq_watchdog.o diff --git a/drivers/staging/android/fiq_debugger/fiq_watchdog.c b/drivers/staging/android/fiq_debugger/fiq_watchdog.c new file mode 100644 index 000000000000..194b54138417 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_watchdog.c @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2014 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include + +#include "fiq_watchdog.h" +#include "fiq_debugger_priv.h" + +static DEFINE_RAW_SPINLOCK(fiq_watchdog_lock); + +static void fiq_watchdog_printf(struct fiq_debugger_output *output, + const char *fmt, ...) +{ + char buf[256]; + va_list ap; + int len; + + va_start(ap, fmt); + len = vscnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + ramoops_console_write_buf(buf, len); +} + +struct fiq_debugger_output fiq_watchdog_output = { + .printf = fiq_watchdog_printf, +}; + +void fiq_watchdog_triggered(const struct pt_regs *regs, void *svc_sp) +{ + char msg[24]; + int len; + + raw_spin_lock(&fiq_watchdog_lock); + + len = scnprintf(msg, sizeof(msg), "watchdog fiq cpu %d\n", + THREAD_INFO(svc_sp)->cpu); + ramoops_console_write_buf(msg, len); + + fiq_debugger_dump_stacktrace(&fiq_watchdog_output, regs, 100, svc_sp); + + raw_spin_unlock(&fiq_watchdog_lock); +} diff --git a/drivers/staging/android/fiq_debugger/fiq_watchdog.h b/drivers/staging/android/fiq_debugger/fiq_watchdog.h new file mode 100644 index 000000000000..c6b507f8d976 --- /dev/null +++ b/drivers/staging/android/fiq_debugger/fiq_watchdog.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2014 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _FIQ_WATCHDOG_H_ +#define _FIQ_WATCHDOG_H_ + +void fiq_watchdog_triggered(const struct pt_regs *regs, void *svc_sp); + +#endif -- GitLab From daeea72326db6572c9fdb4f68b0330e8b8d61b71 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 4 Dec 2015 11:32:21 -0800 Subject: [PATCH 0889/1442] ANDROID: fiq_debugger: Build fixes for 4.1 Small build updates for 4.1 Signed-off-by: John Stultz --- drivers/staging/android/fiq_debugger/fiq_debugger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c index 7d6b4ae8a2cd..7f056831dbff 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -517,7 +517,7 @@ static bool fiq_debugger_fiq_exec(struct fiq_debugger_state *state, fiq_debugger_printf(&state->output, "cpu %d\n", state->current_cpu); } else if (!strncmp(cmd, "cpu ", 4)) { unsigned long cpu = 0; - if (strict_strtoul(cmd + 4, 10, &cpu) == 0) + if (kstrtoul(cmd + 4, 10, &cpu) == 0) fiq_debugger_switch_cpu(state, cpu); else fiq_debugger_printf(&state->output, "invalid cpu\n"); @@ -1144,7 +1144,7 @@ static int fiq_debugger_probe(struct platform_device *pdev) if (state->wakeup_irq >= 0) { ret = request_irq(state->wakeup_irq, fiq_debugger_wakeup_irq_handler, - IRQF_TRIGGER_FALLING | IRQF_DISABLED, + IRQF_TRIGGER_FALLING, "debug-wakeup", state); if (ret) { pr_err("serial_debugger: " -- GitLab From be00a27cc64562cdc5c7dae7a064cc8ff78197e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Wed, 10 Dec 2008 20:06:28 -0800 Subject: [PATCH 0890/1442] ANDROID: sched: Enable might_sleep before initializing drivers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows detection of init bugs in built-in drivers. Signed-off-by: Arve Hjønnevåg --- kernel/sched/core.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 154fd689fe02..79e5e07d3a2c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7703,6 +7703,14 @@ static inline int preempt_count_equals(int preempt_offset) return (nested == preempt_offset); } +static int __might_sleep_init_called; +int __init __might_sleep_init(void) +{ + __might_sleep_init_called = 1; + return 0; +} +early_initcall(__might_sleep_init); + void __might_sleep(const char *file, int line, int preempt_offset) { /* @@ -7728,8 +7736,10 @@ void ___might_sleep(const char *file, int line, int preempt_offset) rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && - !is_idle_task(current)) || - system_state != SYSTEM_RUNNING || oops_in_progress) + !is_idle_task(current)) || oops_in_progress) + return; + if (system_state != SYSTEM_RUNNING && + (!__might_sleep_init_called || system_state != SYSTEM_BOOTING)) return; if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) return; -- GitLab From a5a44472ed524e30b73c00b3d8e1b43f515efc04 Mon Sep 17 00:00:00 2001 From: Riley Andrews Date: Fri, 2 Oct 2015 00:39:53 -0700 Subject: [PATCH 0891/1442] ANDROID: sched: add sched blocked tracepoint which dumps out context of sleep. Decare war on uninterruptible sleep. Add a tracepoint which walks the kernel stack and dumps the first non-scheduler function called before the scheduler is invoked. Change-Id: I19e965d5206329360a92cbfe2afcc8c30f65c229 Signed-off-by: Riley Andrews --- include/trace/events/sched.h | 26 +++++++++++++++++++++++++- kernel/sched/fair.c | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9b90c57517a9..3211890ee7d5 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -219,7 +219,7 @@ DECLARE_EVENT_CLASS(sched_process_template, DEFINE_EVENT(sched_process_template, sched_process_free, TP_PROTO(struct task_struct *p), TP_ARGS(p)); - + /* * Tracepoint for a task exiting: @@ -373,6 +373,30 @@ DEFINE_EVENT(sched_stat_template, sched_stat_blocked, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); +/* + * Tracepoint for recording the cause of uninterruptible sleep. + */ +TRACE_EVENT(sched_blocked_reason, + + TP_PROTO(struct task_struct *tsk), + + TP_ARGS(tsk), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( void*, caller ) + __field( bool, io_wait ) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->caller = (void*)get_wchan(tsk); + __entry->io_wait = tsk->in_iowait; + ), + + TP_printk("pid=%d iowait=%d caller=%pS", __entry->pid, __entry->io_wait, __entry->caller) +); + /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c242944f5cbd..bdb03b0104b7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -937,6 +937,7 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) } trace_sched_stat_blocked(tsk, delta); + trace_sched_blocked_reason(tsk); /* * Blocking time is in units of nanosecs, so shift by -- GitLab From 81863d602de4e248d82baddfcf5076b0cfd2391b Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 19 Sep 2011 16:42:44 -0700 Subject: [PATCH 0892/1442] ANDROID: cpuidle: governor: menu: don't use loadavg get_loadavg doesn't work as intended. According to the comments, it should be returning an average over a few seconds, but it is actually reading the instantaneous load. It is almost always returning 0, but can sometimes, depending on workload, spike very high into the hundreds even when the average cpu load is under 10%. Disable it for now. Change-Id: I63ed100af1cf9463549939b8113ed83676db5f86 Signed-off-by: Colin Cross --- drivers/cpuidle/governors/menu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 03d38c291de6..65bb6fd70439 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -178,7 +178,12 @@ static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned lo /* for higher loadavg, we are more reluctant */ - mult += 2 * get_loadavg(load); + /* + * this doesn't work as intended - it is almost always 0, but can + * sometimes, depending on workload, spike very high into the hundreds + * even when the average cpu load is under 10%. + */ + /* mult += 2 * get_loadavg(); */ /* for IO wait tasks (per cpu!) we add 5x each */ mult += 10 * nr_iowaiters; -- GitLab From 5ee34129c9b2e69d77ec704e1bc004455568ae70 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Wed, 15 Jun 2011 17:21:57 -0700 Subject: [PATCH 0893/1442] ANDROID: Move x86_64 idle notifiers to generic Move the x86_64 idle notifiers originally by Andi Kleen and Venkatesh Pallipadi to generic. Change-Id: Idf29cda15be151f494ff245933c12462643388d5 Acked-by: Nicolas Pitre Signed-off-by: Todd Poynor --- arch/x86/include/asm/idle.h | 7 ------- arch/x86/kernel/process.c | 17 ++--------------- include/linux/cpu.h | 7 +++++++ kernel/cpu.c | 20 ++++++++++++++++++++ 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index c5d1785373ed..02bab09707f2 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h @@ -1,13 +1,6 @@ #ifndef _ASM_X86_IDLE_H #define _ASM_X86_IDLE_H -#define IDLE_START 1 -#define IDLE_END 2 - -struct notifier_block; -void idle_notifier_register(struct notifier_block *n); -void idle_notifier_unregister(struct notifier_block *n); - #ifdef CONFIG_X86_64 void enter_idle(void); void exit_idle(void); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0888a879120f..76629f48fdb0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -67,19 +67,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_tss); #ifdef CONFIG_X86_64 static DEFINE_PER_CPU(unsigned char, is_idle); -static ATOMIC_NOTIFIER_HEAD(idle_notifier); - -void idle_notifier_register(struct notifier_block *n) -{ - atomic_notifier_chain_register(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_register); - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_unregister); #endif /* @@ -255,14 +242,14 @@ static inline void play_dead(void) void enter_idle(void) { this_cpu_write(is_idle, 1); - atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); + idle_notifier_call_chain(IDLE_START); } static void __exit_idle(void) { if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) return; - atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); + idle_notifier_call_chain(IDLE_END); } /* Called from interrupts to signify idle end */ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index b886dc17f2f3..9f237b657018 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -253,4 +253,11 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +#define IDLE_START 1 +#define IDLE_END 2 + +void idle_notifier_register(struct notifier_block *n); +void idle_notifier_unregister(struct notifier_block *n); +void idle_notifier_call_chain(unsigned long val); + #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 29de1a9352c0..4619cc105170 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1932,3 +1932,23 @@ void __init boot_cpu_state_init(void) { per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE; } + +static ATOMIC_NOTIFIER_HEAD(idle_notifier); + +void idle_notifier_register(struct notifier_block *n) +{ + atomic_notifier_chain_register(&idle_notifier, n); +} +EXPORT_SYMBOL_GPL(idle_notifier_register); + +void idle_notifier_unregister(struct notifier_block *n) +{ + atomic_notifier_chain_unregister(&idle_notifier, n); +} +EXPORT_SYMBOL_GPL(idle_notifier_unregister); + +void idle_notifier_call_chain(unsigned long val) +{ + atomic_notifier_call_chain(&idle_notifier, val, NULL); +} +EXPORT_SYMBOL_GPL(idle_notifier_call_chain); -- GitLab From 83285ed83fc4975700b68f49246fedf7c9bd86e4 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Wed, 15 Jun 2011 17:44:50 -0700 Subject: [PATCH 0894/1442] ANDROID: ARM: Call idle notifiers Change-Id: Id833e61c13baa1783705ac9e9046d1f0cc90c95e Acked-by: Nicolas Pitre Signed-off-by: Todd Poynor --- arch/arm/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 91d2d5b01414..aae93b483f86 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -80,6 +80,7 @@ void arch_cpu_idle_prepare(void) void arch_cpu_idle_enter(void) { + idle_notifier_call_chain(IDLE_START); ledtrig_cpu(CPU_LED_IDLE_START); #ifdef CONFIG_PL310_ERRATA_769419 wmb(); @@ -89,6 +90,7 @@ void arch_cpu_idle_enter(void) void arch_cpu_idle_exit(void) { ledtrig_cpu(CPU_LED_IDLE_END); + idle_notifier_call_chain(IDLE_END); } void __show_regs(struct pt_regs *regs) -- GitLab From 1bcf88f16e413e97a56ddfcbadc1b2aa0cc6348d Mon Sep 17 00:00:00 2001 From: Erik Gilling Date: Fri, 14 Sep 2012 14:36:34 -0700 Subject: [PATCH 0895/1442] ANDROID: sync: add Documentation/sync.txt Change-Id: Ic7f3a6d4622083be607b82ddd8d676609225bb8f Signed-off-by: Erik Gilling --- Documentation/sync.txt | 75 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 Documentation/sync.txt diff --git a/Documentation/sync.txt b/Documentation/sync.txt new file mode 100644 index 000000000000..a2d05e7fa193 --- /dev/null +++ b/Documentation/sync.txt @@ -0,0 +1,75 @@ +Motivation: + +In complicated DMA pipelines such as graphics (multimedia, camera, gpu, display) +a consumer of a buffer needs to know when the producer has finished producing +it. Likewise the producer needs to know when the consumer is finished with the +buffer so it can reuse it. A particular buffer may be consumed by multiple +consumers which will retain the buffer for different amounts of time. In +addition, a consumer may consume multiple buffers atomically. +The sync framework adds an API which allows synchronization between the +producers and consumers in a generic way while also allowing platforms which +have shared hardware synchronization primitives to exploit them. + +Goals: + * provide a generic API for expressing synchronization dependencies + * allow drivers to exploit hardware synchronization between hardware + blocks + * provide a userspace API that allows a compositor to manage + dependencies. + * provide rich telemetry data to allow debugging slowdowns and stalls of + the graphics pipeline. + +Objects: + * sync_timeline + * sync_pt + * sync_fence + +sync_timeline: + +A sync_timeline is an abstract monotonically increasing counter. In general, +each driver/hardware block context will have one of these. They can be backed +by the appropriate hardware or rely on the generic sw_sync implementation. +Timelines are only ever created through their specific implementations +(i.e. sw_sync.) + +sync_pt: + +A sync_pt is an abstract value which marks a point on a sync_timeline. Sync_pts +have a single timeline parent. They have 3 states: active, signaled, and error. +They start in active state and transition, once, to either signaled (when the +timeline counter advances beyond the sync_pt’s value) or error state. + +sync_fence: + +Sync_fences are the primary primitives used by drivers to coordinate +synchronization of their buffers. They are a collection of sync_pts which may +or may not have the same timeline parent. A sync_pt can only exist in one fence +and the fence's list of sync_pts is immutable once created. Fences can be +waited on synchronously or asynchronously. Two fences can also be merged to +create a third fence containing a copy of the two fences’ sync_pts. Fences are +backed by file descriptors to allow userspace to coordinate the display pipeline +dependencies. + +Use: + +A driver implementing sync support should have a work submission function which: + * takes a fence argument specifying when to begin work + * asynchronously queues that work to kick off when the fence is signaled + * returns a fence to indicate when its work will be done. + * signals the returned fence once the work is completed. + +Consider an imaginary display driver that has the following API: +/* + * assumes buf is ready to be displayed. + * blocks until the buffer is on screen. + */ + void display_buffer(struct dma_buf *buf); + +The new API will become: +/* + * will display buf when fence is signaled. + * returns immediately with a fence that will signal when buf + * is no longer displayed. + */ +struct sync_fence* display_buffer(struct dma_buf *buf, + struct sync_fence *fence); -- GitLab From 88848da21e635d516f910d6ef53ce16232fc8301 Mon Sep 17 00:00:00 2001 From: Jamie Gennis Date: Thu, 21 Feb 2013 17:55:28 -0800 Subject: [PATCH 0896/1442] ANDROID: trace/events: add gpu trace events Change-Id: I0607b9c776acf61cb796b8572cf8cfb8b2dc1377 Signed-off-by: Jamie Gennis --- include/trace/events/gpu.h | 142 +++++++++++++++++++++++++++++++++++++ kernel/trace/Kconfig | 3 + kernel/trace/Makefile | 1 + kernel/trace/gpu-traces.c | 23 ++++++ 4 files changed, 169 insertions(+) create mode 100644 include/trace/events/gpu.h create mode 100644 kernel/trace/gpu-traces.c diff --git a/include/trace/events/gpu.h b/include/trace/events/gpu.h new file mode 100644 index 000000000000..09efa71d66c6 --- /dev/null +++ b/include/trace/events/gpu.h @@ -0,0 +1,142 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gpu + +#if !defined(_TRACE_GPU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_GPU_H + +#include +#include + +#define show_secs_from_ns(ns) \ + ({ \ + u64 t = ns + (NSEC_PER_USEC / 2); \ + do_div(t, NSEC_PER_SEC); \ + }) + +#define show_usecs_from_ns(ns) \ + ({ \ + u64 t = ns + (NSEC_PER_USEC / 2) ; \ + u32 rem; \ + do_div(t, NSEC_PER_USEC); \ + rem = do_div(t, USEC_PER_SEC); \ + }) + +/* + * The gpu_sched_switch event indicates that a switch from one GPU context to + * another occurred on one of the GPU hardware blocks. + * + * The gpu_name argument identifies the GPU hardware block. Each independently + * scheduled GPU hardware block should have a different name. This may be used + * in different ways for different GPUs. For example, if a GPU includes + * multiple processing cores it may use names "GPU 0", "GPU 1", etc. If a GPU + * includes a separately scheduled 2D and 3D hardware block, it might use the + * names "2D" and "3D". + * + * The timestamp argument is the timestamp at which the switch occurred on the + * GPU. These timestamps are in units of nanoseconds and must use + * approximately the same time as sched_clock, though they need not come from + * any CPU clock. The timestamps for a single hardware block must be + * monotonically nondecreasing. This means that if a variable compensation + * offset is used to translate from some other clock to the sched_clock, then + * care must be taken when increasing that offset, and doing so may result in + * multiple events with the same timestamp. + * + * The next_ctx_id argument identifies the next context that was running on + * the GPU hardware block. A value of 0 indicates that the hardware block + * will be idle. + * + * The next_prio argument indicates the priority of the next context at the + * time of the event. The exact numeric values may mean different things for + * different GPUs, but they should follow the rule that lower values indicate a + * higher priority. + * + * The next_job_id argument identifies the batch of work that the GPU will be + * working on. This should correspond to a job_id that was previously traced + * as a gpu_job_enqueue event when the batch of work was created. + */ +TRACE_EVENT(gpu_sched_switch, + + TP_PROTO(const char *gpu_name, u64 timestamp, + u32 next_ctx_id, s32 next_prio, u32 next_job_id), + + TP_ARGS(gpu_name, timestamp, next_ctx_id, next_prio, next_job_id), + + TP_STRUCT__entry( + __string( gpu_name, gpu_name ) + __field( u64, timestamp ) + __field( u32, next_ctx_id ) + __field( s32, next_prio ) + __field( u32, next_job_id ) + ), + + TP_fast_assign( + __assign_str(gpu_name, gpu_name); + __entry->timestamp = timestamp; + __entry->next_ctx_id = next_ctx_id; + __entry->next_prio = next_prio; + __entry->next_job_id = next_job_id; + ), + + TP_printk("gpu_name=%s ts=%5llu.%06lu next_ctx_id=%lu next_prio=%ld " + "next_job_id=%lu", + __get_str(gpu_name), + (unsigned long long)show_secs_from_ns(__entry->timestamp), + (unsigned long)show_usecs_from_ns(__entry->timestamp), + (unsigned long)__entry->next_ctx_id, + (long)__entry->next_prio, + (unsigned long)__entry->next_job_id) +); + +/* + * The gpu_job_enqueue event indicates that a batch of work has been queued up + * to be processed by the GPU. This event is not intended to indicate that + * the batch of work has been submitted to the GPU hardware, but rather that + * it has been submitted to the GPU kernel driver. + * + * This event should be traced on the thread that initiated the work being + * queued. For example, if a batch of work is submitted to the kernel by a + * userland thread, the event should be traced on that thread. + * + * The ctx_id field identifies the GPU context in which the batch of work + * being queued is to be run. + * + * The job_id field identifies the batch of work being queued within the given + * GPU context. The first batch of work submitted for a given GPU context + * should have a job_id of 0, and each subsequent batch of work should + * increment the job_id by 1. + * + * The type field identifies the type of the job being enqueued. The job + * types may be different for different GPU hardware. For example, a GPU may + * differentiate between "2D", "3D", and "compute" jobs. + */ +TRACE_EVENT(gpu_job_enqueue, + + TP_PROTO(u32 ctx_id, u32 job_id, const char *type), + + TP_ARGS(ctx_id, job_id, type), + + TP_STRUCT__entry( + __field( u32, ctx_id ) + __field( u32, job_id ) + __string( type, type ) + ), + + TP_fast_assign( + __entry->ctx_id = ctx_id; + __entry->job_id = job_id; + __assign_str(type, type); + ), + + TP_printk("ctx_id=%lu job_id=%lu type=%s", + (unsigned long)__entry->ctx_id, + (unsigned long)__entry->job_id, + __get_str(type)) +); + +#undef show_secs_from_ns +#undef show_usecs_from_ns + +#endif /* _TRACE_GPU_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2a96b063d659..da5768901a0d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -72,6 +72,9 @@ config EVENT_TRACING select CONTEXT_SWITCH_TRACER bool +config GPU_TRACEPOINTS + bool + config CONTEXT_SWITCH_TRACER bool diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index e57980845549..b4eaf9c9c610 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -67,6 +67,7 @@ obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o +obj-$(CONFIG_GPU_TRACEPOINTS) += gpu-traces.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/gpu-traces.c b/kernel/trace/gpu-traces.c new file mode 100644 index 000000000000..a4b3f00faee3 --- /dev/null +++ b/kernel/trace/gpu-traces.c @@ -0,0 +1,23 @@ +/* + * GPU tracepoints + * + * Copyright (C) 2013 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include + +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL(gpu_sched_switch); +EXPORT_TRACEPOINT_SYMBOL(gpu_job_enqueue); -- GitLab From 6eaff2cd5830a6386ba64e54cabce9c83215acd0 Mon Sep 17 00:00:00 2001 From: Jamie Gennis Date: Wed, 21 Nov 2012 15:04:25 -0800 Subject: [PATCH 0897/1442] ANDROID: trace: Add an option to show tgids in trace output The tgids are tracked along side the saved_cmdlines tracking, and can be included in trace output by enabling the 'print-tgid' trace option. This is useful when doing post-processing of the trace data, as it allows events to be grouped by tgid. Change-Id: I52ed04c3a8ca7fddbb868b792ce5d21ceb76250e Signed-off-by: Jamie Gennis --- kernel/trace/trace.c | 98 +++++++++++++++++++++++++++++++++++-- kernel/trace/trace.h | 4 +- kernel/trace/trace_output.c | 14 +++++- 3 files changed, 110 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8696ce6bf2f6..d658122b7c99 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1597,6 +1597,7 @@ void tracing_reset_all_online_cpus(void) #define SAVED_CMDLINES_DEFAULT 128 #define NO_CMDLINE_MAP UINT_MAX +static unsigned saved_tgids[SAVED_CMDLINES]; static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; struct saved_cmdlines_buffer { unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; @@ -1835,7 +1836,7 @@ static int trace_save_cmdline(struct task_struct *tsk) } set_cmdline(idx, tsk->comm); - + saved_tgids[idx] = tsk->tgid; arch_spin_unlock(&trace_cmdline_lock); return 1; @@ -1878,6 +1879,25 @@ void trace_find_cmdline(int pid, char comm[]) preempt_enable(); } +int trace_find_tgid(int pid) +{ + unsigned map; + int tgid; + + preempt_disable(); + arch_spin_lock(&trace_cmdline_lock); + map = map_pid_to_cmdline[pid]; + if (map != NO_CMDLINE_MAP) + tgid = saved_tgids[map]; + else + tgid = -1; + + arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); + + return tgid; +} + void tracing_record_cmdline(struct task_struct *tsk) { if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on()) @@ -2920,6 +2940,13 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m) "# | | | | |\n"); } +static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m) +{ + print_event_info(buf, m); + seq_puts(m, "# TASK-PID TGID CPU# TIMESTAMP FUNCTION\n"); + seq_puts(m, "# | | | | | |\n"); +} + static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m) { print_event_info(buf, m); @@ -2932,6 +2959,18 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file "# | | | |||| | |\n"); } +static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m) +{ + print_event_info(buf, m); + seq_puts(m, "# _-----=> irqs-off\n"); + seq_puts(m, "# / _----=> need-resched\n"); + seq_puts(m, "# | / _---=> hardirq/softirq\n"); + seq_puts(m, "# || / _--=> preempt-depth\n"); + seq_puts(m, "# ||| / delay\n"); + seq_puts(m, "# TASK-PID TGID CPU# |||| TIMESTAMP FUNCTION\n"); + seq_puts(m, "# | | | | |||| | |\n"); +} + void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { @@ -3244,9 +3283,15 @@ void trace_default_header(struct seq_file *m) } else { if (!(trace_flags & TRACE_ITER_VERBOSE)) { if (trace_flags & TRACE_ITER_IRQ_INFO) - print_func_help_header_irq(iter->trace_buffer, m); + if (trace_flags & TRACE_ITER_TGID) + print_func_help_header_irq_tgid(iter->trace_buffer, m); + else + print_func_help_header_irq(iter->trace_buffer, m); else - print_func_help_header(iter->trace_buffer, m); + if (trace_flags & TRACE_ITER_TGID) + print_func_help_header_tgid(iter->trace_buffer, m); + else + print_func_help_header(iter->trace_buffer, m); } } } @@ -4581,6 +4626,50 @@ static void trace_insert_enum_map(struct module *mod, trace_insert_enum_map_file(mod, start, len); } +static ssize_t +tracing_saved_tgids_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *file_buf; + char *buf; + int len = 0; + int pid; + int i; + + file_buf = kmalloc(SAVED_CMDLINES*(16+1+16), GFP_KERNEL); + if (!file_buf) + return -ENOMEM; + + buf = file_buf; + + for (i = 0; i < SAVED_CMDLINES; i++) { + int tgid; + int r; + + pid = map_cmdline_to_pid[i]; + if (pid == -1 || pid == NO_CMDLINE_MAP) + continue; + + tgid = trace_find_tgid(pid); + r = sprintf(buf, "%d %d\n", pid, tgid); + buf += r; + len += r; + } + + len = simple_read_from_buffer(ubuf, cnt, ppos, + file_buf, len); + + kfree(file_buf); + + return len; +} + +static const struct file_operations tracing_saved_tgids_fops = { + .open = tracing_open_generic, + .read = tracing_saved_tgids_read, + .llseek = generic_file_llseek, +}; + static ssize_t tracing_set_trace_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -7214,6 +7303,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("trace_marker", 0220, d_tracer, tr, &tracing_mark_fops); + trace_create_file("saved_tgids", 0444, d_tracer, + tr, &tracing_saved_tgids_fops); + trace_create_file("trace_clock", 0644, d_tracer, tr, &trace_clock_fops); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fd24b1f9ac43..250ebf465829 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -690,6 +690,7 @@ extern cycle_t ftrace_now(int cpu); extern void trace_find_cmdline(int pid, char comm[]); extern void trace_event_follow_fork(struct trace_array *tr, bool enable); +extern int trace_find_tgid(int pid); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; @@ -1007,7 +1008,8 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ - BRANCH_FLAGS + BRANCH_FLAGS \ + C(TGID, "print-tgid"), /* * By defining C, we can make TRACE_FLAGS a list of bit names diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 3fc20422c166..d08f51822f66 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -530,11 +530,21 @@ int trace_print_context(struct trace_iterator *iter) unsigned long long t; unsigned long secs, usec_rem; char comm[TASK_COMM_LEN]; + int tgid; trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%16s-%-5d [%03d] ", - comm, entry->pid, iter->cpu); + trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + + if (tr->trace_flags & TRACE_ITER_TGID) { + tgid = trace_find_tgid(entry->pid); + if (tgid < 0) + trace_seq_puts(s, "(-----) "); + else + trace_seq_printf(s, "(%5d) ", tgid); + } + + trace_seq_printf(s, "[%03d] ", iter->cpu); if (tr->trace_flags & TRACE_ITER_IRQ_INFO) trace_print_lat_fmt(s, entry); -- GitLab From 7ee9aa44282643480974cbfe7417469963a7c725 Mon Sep 17 00:00:00 2001 From: Jamie Gennis Date: Wed, 21 Nov 2012 20:14:09 -0800 Subject: [PATCH 0898/1442] ANDROID: trace: add non-hierarchical function_graph option Add the 'funcgraph-flat' option to the function_graph tracer to use the default trace printing format rather than the hierarchical formatting normally used. Change-Id: If2900bfb86e6f8f51379f56da4f6fabafa630909 Signed-off-by: Jamie Gennis --- Documentation/trace/ftrace.txt | 29 +++++ kernel/trace/trace_functions_graph.c | 43 +++---- kernel/trace/trace_output.c | 164 +++++++++++++++++++++++++++ 3 files changed, 206 insertions(+), 30 deletions(-) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 185c39fea2a0..e20aacb9a6e8 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -2102,6 +2102,35 @@ will produce: 1) 1.449 us | } +You can disable the hierarchical function call formatting and instead print a +flat list of function entry and return events. This uses the format described +in the Output Formatting section and respects all the trace options that +control that formatting. Hierarchical formatting is the default. + + hierachical: echo nofuncgraph-flat > trace_options + flat: echo funcgraph-flat > trace_options + + ie: + + # tracer: function_graph + # + # entries-in-buffer/entries-written: 68355/68355 #P:2 + # + # _-----=> irqs-off + # / _----=> need-resched + # | / _---=> hardirq/softirq + # || / _--=> preempt-depth + # ||| / delay + # TASK-PID CPU# |||| TIMESTAMP FUNCTION + # | | | |||| | | + sh-1806 [001] d... 198.843443: graph_ent: func=_raw_spin_lock + sh-1806 [001] d... 198.843445: graph_ent: func=__raw_spin_lock + sh-1806 [001] d..1 198.843447: graph_ret: func=__raw_spin_lock + sh-1806 [001] d..1 198.843449: graph_ret: func=_raw_spin_lock + sh-1806 [001] d..1 198.843451: graph_ent: func=_raw_spin_unlock_irqrestore + sh-1806 [001] d... 198.843453: graph_ret: func=_raw_spin_unlock_irqrestore + + You might find other useful features for this tracer in the following "dynamic ftrace" section such as tracing only specific functions or tasks. diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4e480e870474..305f535e24ee 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -65,6 +65,9 @@ struct fgraph_data { #define TRACE_GRAPH_INDENT 2 +/* Flag options */ +#define TRACE_GRAPH_PRINT_FLAT 0x80 + static unsigned int max_depth; static struct tracer_opt trace_opts[] = { @@ -88,6 +91,8 @@ static struct tracer_opt trace_opts[] = { { TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) }, /* Include time within nested functions */ { TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) }, + /* Use standard trace formatting rather than hierarchical */ + { TRACER_OPT(funcgraph-flat, TRACE_GRAPH_PRINT_FLAT) }, { } /* Empty entry */ }; @@ -1232,6 +1237,9 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) int cpu = iter->cpu; int ret; + if (flags & TRACE_GRAPH_PRINT_FLAT) + return TRACE_TYPE_UNHANDLED; + if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) { per_cpu_ptr(data->cpu_data, cpu)->ignore = 0; return TRACE_TYPE_HANDLED; @@ -1289,13 +1297,6 @@ print_graph_function(struct trace_iterator *iter) return print_graph_function_flags(iter, tracer_flags.val); } -static enum print_line_t -print_graph_function_event(struct trace_iterator *iter, int flags, - struct trace_event *event) -{ - return print_graph_function(iter); -} - static void print_lat_header(struct seq_file *s, u32 flags) { static const char spaces[] = " " /* 16 spaces */ @@ -1364,6 +1365,11 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags) struct trace_iterator *iter = s->private; struct trace_array *tr = iter->tr; + if (flags & TRACE_GRAPH_PRINT_FLAT) { + trace_default_header(s); + return; + } + if (!(tr->trace_flags & TRACE_ITER_CONTEXT_INFO)) return; @@ -1445,19 +1451,6 @@ func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) return 0; } -static struct trace_event_functions graph_functions = { - .trace = print_graph_function_event, -}; - -static struct trace_event graph_trace_entry_event = { - .type = TRACE_GRAPH_ENT, - .funcs = &graph_functions, -}; - -static struct trace_event graph_trace_ret_event = { - .type = TRACE_GRAPH_RET, - .funcs = &graph_functions -}; static struct tracer graph_trace __tracer_data = { .name = "function_graph", @@ -1534,16 +1527,6 @@ static __init int init_graph_trace(void) { max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); - if (!register_trace_event(&graph_trace_entry_event)) { - pr_warn("Warning: could not register graph trace events\n"); - return 1; - } - - if (!register_trace_event(&graph_trace_ret_event)) { - pr_warn("Warning: could not register graph trace events\n"); - return 1; - } - return register_tracer(&graph_trace); } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d08f51822f66..fb44e2027a8f 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -859,6 +859,168 @@ static struct trace_event trace_fn_event = { .funcs = &trace_fn_funcs, }; +/* TRACE_GRAPH_ENT */ +static enum print_line_t trace_graph_ent_trace(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct trace_seq *s = &iter->seq; + struct ftrace_graph_ent_entry *field; + + trace_assign_type(field, iter->ent); + + if (!trace_seq_puts(s, "graph_ent: func=")) + return TRACE_TYPE_PARTIAL_LINE; + + if (!seq_print_ip_sym(s, field->graph_ent.func, flags)) + return TRACE_TYPE_PARTIAL_LINE; + + if (!trace_seq_puts(s, "\n")) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ent_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ent_entry *field; + + trace_assign_type(field, iter->ent); + + if (!trace_seq_printf(&iter->seq, "%lx %d\n", + field->graph_ent.func, + field->graph_ent.depth)) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ent_hex(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ent_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.func); + SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.depth); + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ent_bin(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ent_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_FIELD_RET(s, field->graph_ent.func); + SEQ_PUT_FIELD_RET(s, field->graph_ent.depth); + + return TRACE_TYPE_HANDLED; +} + +static struct trace_event_functions trace_graph_ent_funcs = { + .trace = trace_graph_ent_trace, + .raw = trace_graph_ent_raw, + .hex = trace_graph_ent_hex, + .binary = trace_graph_ent_bin, +}; + +static struct trace_event trace_graph_ent_event = { + .type = TRACE_GRAPH_ENT, + .funcs = &trace_graph_ent_funcs, +}; + +/* TRACE_GRAPH_RET */ +static enum print_line_t trace_graph_ret_trace(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct ftrace_graph_ret_entry *field; + + trace_assign_type(field, entry); + + if (!trace_seq_puts(s, "graph_ret: func=")) + return TRACE_TYPE_PARTIAL_LINE; + + if (!seq_print_ip_sym(s, field->ret.func, flags)) + return TRACE_TYPE_PARTIAL_LINE; + + if (!trace_seq_puts(s, "\n")) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ret_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ret_entry *field; + + trace_assign_type(field, iter->ent); + + if (!trace_seq_printf(&iter->seq, "%lx %lld %lld %ld %d\n", + field->ret.func, + field->ret.calltime, + field->ret.rettime, + field->ret.overrun, + field->ret.depth)); + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ret_hex(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ret_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_HEX_FIELD_RET(s, field->ret.func); + SEQ_PUT_HEX_FIELD_RET(s, field->ret.calltime); + SEQ_PUT_HEX_FIELD_RET(s, field->ret.rettime); + SEQ_PUT_HEX_FIELD_RET(s, field->ret.overrun); + SEQ_PUT_HEX_FIELD_RET(s, field->ret.depth); + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ret_bin(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ret_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_FIELD_RET(s, field->ret.func); + SEQ_PUT_FIELD_RET(s, field->ret.calltime); + SEQ_PUT_FIELD_RET(s, field->ret.rettime); + SEQ_PUT_FIELD_RET(s, field->ret.overrun); + SEQ_PUT_FIELD_RET(s, field->ret.depth); + + return TRACE_TYPE_HANDLED; +} + +static struct trace_event_functions trace_graph_ret_funcs = { + .trace = trace_graph_ret_trace, + .raw = trace_graph_ret_raw, + .hex = trace_graph_ret_hex, + .binary = trace_graph_ret_bin, +}; + +static struct trace_event trace_graph_ret_event = { + .type = TRACE_GRAPH_RET, + .funcs = &trace_graph_ret_funcs, +}; + /* TRACE_CTX an TRACE_WAKE */ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, char *delim) @@ -1301,6 +1463,8 @@ static struct trace_event trace_print_event = { static struct trace_event *events[] __initdata = { &trace_fn_event, + &trace_graph_ent_event, + &trace_graph_ret_event, &trace_ctx_event, &trace_wake_event, &trace_stack_event, -- GitLab From dbd4bf94528ee3c645b277cf0f328880e4d13818 Mon Sep 17 00:00:00 2001 From: Jamie Gennis Date: Thu, 18 Apr 2013 20:36:21 -0700 Subject: [PATCH 0899/1442] ANDROID: trace/events: fix gpu event timestamp formatting This change fixes the how the gpu_sched_switch timestamp field is formatted. Signed-off-by: Jamie Gennis Change-Id: I273234935254ed15772c9e561c9af20e480004ae --- include/trace/events/gpu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/events/gpu.h b/include/trace/events/gpu.h index 09efa71d66c6..7e15cdfafe5a 100644 --- a/include/trace/events/gpu.h +++ b/include/trace/events/gpu.h @@ -11,6 +11,7 @@ ({ \ u64 t = ns + (NSEC_PER_USEC / 2); \ do_div(t, NSEC_PER_SEC); \ + t; \ }) #define show_usecs_from_ns(ns) \ @@ -77,7 +78,7 @@ TRACE_EVENT(gpu_sched_switch, __entry->next_job_id = next_job_id; ), - TP_printk("gpu_name=%s ts=%5llu.%06lu next_ctx_id=%lu next_prio=%ld " + TP_printk("gpu_name=%s ts=%llu.%06lu next_ctx_id=%lu next_prio=%ld " "next_job_id=%lu", __get_str(gpu_name), (unsigned long long)show_secs_from_ns(__entry->timestamp), -- GitLab From b96956e7a5de59d50ab6711aabe62e53fc2edb13 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 28 Oct 2015 10:45:04 -0700 Subject: [PATCH 0900/1442] ANDROID: trace: fix compilation for 4.1 Change-Id: Id88b5d30847bc6d3cfe1d8cd00cbdc975c9712d1 Signed-off-by: Dmitry Shmidt --- kernel/trace/trace.c | 10 ++++---- kernel/trace/trace_output.c | 50 +++++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d658122b7c99..ca5cf512e57a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1597,7 +1597,7 @@ void tracing_reset_all_online_cpus(void) #define SAVED_CMDLINES_DEFAULT 128 #define NO_CMDLINE_MAP UINT_MAX -static unsigned saved_tgids[SAVED_CMDLINES]; +static unsigned saved_tgids[SAVED_CMDLINES_DEFAULT]; static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; struct saved_cmdlines_buffer { unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; @@ -1886,7 +1886,7 @@ int trace_find_tgid(int pid) preempt_disable(); arch_spin_lock(&trace_cmdline_lock); - map = map_pid_to_cmdline[pid]; + map = savedcmd->map_pid_to_cmdline[pid]; if (map != NO_CMDLINE_MAP) tgid = saved_tgids[map]; else @@ -4636,17 +4636,17 @@ tracing_saved_tgids_read(struct file *file, char __user *ubuf, int pid; int i; - file_buf = kmalloc(SAVED_CMDLINES*(16+1+16), GFP_KERNEL); + file_buf = kmalloc(SAVED_CMDLINES_DEFAULT*(16+1+16), GFP_KERNEL); if (!file_buf) return -ENOMEM; buf = file_buf; - for (i = 0; i < SAVED_CMDLINES; i++) { + for (i = 0; i < SAVED_CMDLINES_DEFAULT; i++) { int tgid; int r; - pid = map_cmdline_to_pid[i]; + pid = savedcmd->map_cmdline_to_pid[i]; if (pid == -1 || pid == NO_CMDLINE_MAP) continue; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index fb44e2027a8f..034675950649 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -868,13 +868,15 @@ static enum print_line_t trace_graph_ent_trace(struct trace_iterator *iter, int trace_assign_type(field, iter->ent); - if (!trace_seq_puts(s, "graph_ent: func=")) + trace_seq_puts(s, "graph_ent: func="); + if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; if (!seq_print_ip_sym(s, field->graph_ent.func, flags)) return TRACE_TYPE_PARTIAL_LINE; - if (!trace_seq_puts(s, "\n")) + trace_seq_puts(s, "\n"); + if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; return TRACE_TYPE_HANDLED; @@ -887,9 +889,10 @@ static enum print_line_t trace_graph_ent_raw(struct trace_iterator *iter, int fl trace_assign_type(field, iter->ent); - if (!trace_seq_printf(&iter->seq, "%lx %d\n", + trace_seq_printf(&iter->seq, "%lx %d\n", field->graph_ent.func, - field->graph_ent.depth)) + field->graph_ent.depth); + if (trace_seq_has_overflowed(&iter->seq)) return TRACE_TYPE_PARTIAL_LINE; return TRACE_TYPE_HANDLED; @@ -903,8 +906,8 @@ static enum print_line_t trace_graph_ent_hex(struct trace_iterator *iter, int fl trace_assign_type(field, iter->ent); - SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.func); - SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.depth); + SEQ_PUT_HEX_FIELD(s, field->graph_ent.func); + SEQ_PUT_HEX_FIELD(s, field->graph_ent.depth); return TRACE_TYPE_HANDLED; } @@ -917,8 +920,8 @@ static enum print_line_t trace_graph_ent_bin(struct trace_iterator *iter, int fl trace_assign_type(field, iter->ent); - SEQ_PUT_FIELD_RET(s, field->graph_ent.func); - SEQ_PUT_FIELD_RET(s, field->graph_ent.depth); + SEQ_PUT_FIELD(s, field->graph_ent.func); + SEQ_PUT_FIELD(s, field->graph_ent.depth); return TRACE_TYPE_HANDLED; } @@ -945,13 +948,15 @@ static enum print_line_t trace_graph_ret_trace(struct trace_iterator *iter, int trace_assign_type(field, entry); - if (!trace_seq_puts(s, "graph_ret: func=")) + trace_seq_puts(s, "graph_ret: func="); + if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; if (!seq_print_ip_sym(s, field->ret.func, flags)) return TRACE_TYPE_PARTIAL_LINE; - if (!trace_seq_puts(s, "\n")) + trace_seq_puts(s, "\n"); + if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; return TRACE_TYPE_HANDLED; @@ -964,12 +969,13 @@ static enum print_line_t trace_graph_ret_raw(struct trace_iterator *iter, int fl trace_assign_type(field, iter->ent); - if (!trace_seq_printf(&iter->seq, "%lx %lld %lld %ld %d\n", + trace_seq_printf(&iter->seq, "%lx %lld %lld %ld %d\n", field->ret.func, field->ret.calltime, field->ret.rettime, field->ret.overrun, - field->ret.depth)); + field->ret.depth); + if (trace_seq_has_overflowed(&iter->seq)) return TRACE_TYPE_PARTIAL_LINE; return TRACE_TYPE_HANDLED; @@ -983,11 +989,11 @@ static enum print_line_t trace_graph_ret_hex(struct trace_iterator *iter, int fl trace_assign_type(field, iter->ent); - SEQ_PUT_HEX_FIELD_RET(s, field->ret.func); - SEQ_PUT_HEX_FIELD_RET(s, field->ret.calltime); - SEQ_PUT_HEX_FIELD_RET(s, field->ret.rettime); - SEQ_PUT_HEX_FIELD_RET(s, field->ret.overrun); - SEQ_PUT_HEX_FIELD_RET(s, field->ret.depth); + SEQ_PUT_HEX_FIELD(s, field->ret.func); + SEQ_PUT_HEX_FIELD(s, field->ret.calltime); + SEQ_PUT_HEX_FIELD(s, field->ret.rettime); + SEQ_PUT_HEX_FIELD(s, field->ret.overrun); + SEQ_PUT_HEX_FIELD(s, field->ret.depth); return TRACE_TYPE_HANDLED; } @@ -1000,11 +1006,11 @@ static enum print_line_t trace_graph_ret_bin(struct trace_iterator *iter, int fl trace_assign_type(field, iter->ent); - SEQ_PUT_FIELD_RET(s, field->ret.func); - SEQ_PUT_FIELD_RET(s, field->ret.calltime); - SEQ_PUT_FIELD_RET(s, field->ret.rettime); - SEQ_PUT_FIELD_RET(s, field->ret.overrun); - SEQ_PUT_FIELD_RET(s, field->ret.depth); + SEQ_PUT_FIELD(s, field->ret.func); + SEQ_PUT_FIELD(s, field->ret.calltime); + SEQ_PUT_FIELD(s, field->ret.rettime); + SEQ_PUT_FIELD(s, field->ret.overrun); + SEQ_PUT_FIELD(s, field->ret.depth); return TRACE_TYPE_HANDLED; } -- GitLab From 83707ea3db41ddad347d5c2b6d9ad5b8aaba8518 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Thu, 19 Nov 2015 16:07:19 -0800 Subject: [PATCH 0901/1442] ANDROID: trace: cpufreq: Add tracing for min/max cpufreq Change-Id: I73f6ec437c1f805437d9376abb6510d1364b07ec Signed-off-by: Ruchi Kandoi --- Documentation/trace/events-power.txt | 1 + drivers/cpufreq/cpufreq.c | 1 + include/trace/events/power.h | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.txt index 21d514ced212..4d817d5acc40 100644 --- a/Documentation/trace/events-power.txt +++ b/Documentation/trace/events-power.txt @@ -25,6 +25,7 @@ cpufreq. cpu_idle "state=%lu cpu_id=%lu" cpu_frequency "state=%lu cpu_id=%lu" +cpu_frequency_limits "min=%lu max=%lu cpu_id=%lu" A suspend event is used to indicate the system going in and out of the suspend mode: diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6e6c1fb60fbc..c910111621d7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2194,6 +2194,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, policy->min = new_policy->min; policy->max = new_policy->max; + trace_cpu_frequency_limits(policy->max, policy->min, policy->cpu); policy->cached_target_freq = UINT_MAX; diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 54e3aad32806..3d545204897b 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -147,6 +147,31 @@ DEFINE_EVENT(cpu, cpu_frequency, TP_ARGS(frequency, cpu_id) ); +TRACE_EVENT(cpu_frequency_limits, + + TP_PROTO(unsigned int max_freq, unsigned int min_freq, + unsigned int cpu_id), + + TP_ARGS(max_freq, min_freq, cpu_id), + + TP_STRUCT__entry( + __field( u32, min_freq ) + __field( u32, max_freq ) + __field( u32, cpu_id ) + ), + + TP_fast_assign( + __entry->min_freq = min_freq; + __entry->max_freq = min_freq; + __entry->cpu_id = cpu_id; + ), + + TP_printk("min=%lu max=%lu cpu_id=%lu", + (unsigned long)__entry->min_freq, + (unsigned long)__entry->max_freq, + (unsigned long)__entry->cpu_id) +); + TRACE_EVENT(device_pm_callback_start, TP_PROTO(struct device *dev, const char *pm_ops, int event), -- GitLab From b354d8ad262dbf8ec385dfe35dae53ebd7282d07 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 9 May 2012 16:09:50 -0700 Subject: [PATCH 0902/1442] ANDROID: trace: power: add trace_clock_set_parent Adds a new trace event to be called from clk_set_parent. Some cpufreq drivers, including Tegra, reparent the cpu clock to a slower clock while the main pll is relocking, tracing clk_set_parent allows traces to show how for long the cpu is running slower. Uses a separate TRACE_EVENT instead of the clock event class to allow the event to contain string names for the child and the parent. Signed-off-by: Colin Cross --- include/trace/events/power.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 3d545204897b..ed0919edd1b6 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -325,6 +325,25 @@ DEFINE_EVENT(clock, clock_set_rate, TP_ARGS(name, state, cpu_id) ); +TRACE_EVENT(clock_set_parent, + + TP_PROTO(const char *name, const char *parent_name), + + TP_ARGS(name, parent_name), + + TP_STRUCT__entry( + __string( name, name ) + __string( parent_name, parent_name ) + ), + + TP_fast_assign( + __assign_str(name, name); + __assign_str(parent_name, parent_name); + ), + + TP_printk("%s parent=%s", __get_str(name), __get_str(parent_name)) +); + /* * The power domain events are used for power domains transitions */ -- GitLab From c9864378a2c61c757cf83774c435f84f21aba85f Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Tue, 3 Jul 2012 15:41:20 -0700 Subject: [PATCH 0903/1442] ANDROID: power_supply: Add custom property for USB High Current mode For smb347. Change-Id: I3323469072e1ee5085d61af8a89612b06b91f94a Signed-off-by: Todd Poynor --- drivers/power/supply/power_supply_sysfs.c | 2 ++ include/linux/power_supply.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index bcde8d13476a..3dad0a5b36f1 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -202,6 +202,8 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(model_name), POWER_SUPPLY_ATTR(manufacturer), POWER_SUPPLY_ATTR(serial_number), + /* Local extensions */ + POWER_SUPPLY_ATTR(usb_hc), }; static struct attribute * diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 3965503315ef..ef5be8097158 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -152,6 +152,8 @@ enum power_supply_property { POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, POWER_SUPPLY_PROP_SERIAL_NUMBER, + /* Local extensions */ + POWER_SUPPLY_PROP_USB_HC, }; enum power_supply_type { -- GitLab From af4f6ce7f57ba9d517f4ce14667fb2be2f15e5ba Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Thu, 12 Jul 2012 20:27:16 -0700 Subject: [PATCH 0904/1442] ANDROID: power: power_supply: move POWER_SUPPLY_PROP_USB_HC to type 'int' order Change-Id: I001af30ab5fe06dde5f368241f21b9e0864777a1 Signed-off-by: Todd Poynor --- drivers/power/supply/power_supply_sysfs.c | 4 ++-- include/linux/power_supply.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index 3dad0a5b36f1..23a2bef106d0 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -198,12 +198,12 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(scope), POWER_SUPPLY_ATTR(charge_term_current), POWER_SUPPLY_ATTR(calibrate), + /* Local extensions */ + POWER_SUPPLY_ATTR(usb_hc), /* Properties of type `const char *' */ POWER_SUPPLY_ATTR(model_name), POWER_SUPPLY_ATTR(manufacturer), POWER_SUPPLY_ATTR(serial_number), - /* Local extensions */ - POWER_SUPPLY_ATTR(usb_hc), }; static struct attribute * diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index ef5be8097158..11d4b1c82663 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -148,12 +148,12 @@ enum power_supply_property { POWER_SUPPLY_PROP_SCOPE, POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT, POWER_SUPPLY_PROP_CALIBRATE, + /* Local extensions */ + POWER_SUPPLY_PROP_USB_HC, /* Properties of type `const char *' */ POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, POWER_SUPPLY_PROP_SERIAL_NUMBER, - /* Local extensions */ - POWER_SUPPLY_PROP_USB_HC, }; enum power_supply_type { -- GitLab From a02dedd9a77f679c8d24961b8fc6166d1b518e72 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Fri, 13 Jul 2012 13:30:04 -0700 Subject: [PATCH 0905/1442] ANDROID: power: power_supply: add POWER_SUPPLY_PROP_USB_OTG Change-Id: Idfc6ef2e37d62aad6f26cc8eafa53db642cd352b Signed-off-by: Todd Poynor --- drivers/power/supply/power_supply_sysfs.c | 1 + include/linux/power_supply.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index 23a2bef106d0..453e863388f0 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -200,6 +200,7 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(calibrate), /* Local extensions */ POWER_SUPPLY_ATTR(usb_hc), + POWER_SUPPLY_ATTR(usb_otg), /* Properties of type `const char *' */ POWER_SUPPLY_ATTR(model_name), POWER_SUPPLY_ATTR(manufacturer), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 11d4b1c82663..9d307ff064cb 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -150,6 +150,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_CALIBRATE, /* Local extensions */ POWER_SUPPLY_PROP_USB_HC, + POWER_SUPPLY_PROP_USB_OTG, /* Properties of type `const char *' */ POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, -- GitLab From 635a06d6a69b314e07249ec863fb055da93022b4 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Wed, 18 Jul 2012 16:28:50 -0700 Subject: [PATCH 0906/1442] ANDROID: power: power_supply: add POWER_SUPPLY_PROP_CHARGE_ENABLED Change-Id: I3e93b502452811cbfc4d904202b4f1d94edc143d Signed-off-by: Todd Poynor --- drivers/power/supply/power_supply_sysfs.c | 1 + include/linux/power_supply.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index 453e863388f0..f4ad43d820dd 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -201,6 +201,7 @@ static struct device_attribute power_supply_attrs[] = { /* Local extensions */ POWER_SUPPLY_ATTR(usb_hc), POWER_SUPPLY_ATTR(usb_otg), + POWER_SUPPLY_ATTR(charge_enabled), /* Properties of type `const char *' */ POWER_SUPPLY_ATTR(model_name), POWER_SUPPLY_ATTR(manufacturer), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 9d307ff064cb..1a7e4f02e3ea 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -151,6 +151,7 @@ enum power_supply_property { /* Local extensions */ POWER_SUPPLY_PROP_USB_HC, POWER_SUPPLY_PROP_USB_OTG, + POWER_SUPPLY_PROP_CHARGE_ENABLED, /* Properties of type `const char *' */ POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, -- GitLab From 25c6a63c469f80f7d78a6e971fb4252696841c4f Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Tue, 29 May 2012 17:33:56 -0700 Subject: [PATCH 0907/1442] ANDROID: PM / Suspend: Print wall time at suspend entry and exit Change-Id: I92f252414c013b018b9a392eae1ee039aa0e89dc Signed-off-by: Todd Poynor --- kernel/power/suspend.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 6ccb08f57fcb..028d2bee62b0 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -527,6 +528,18 @@ static int enter_state(suspend_state_t state) return error; } +static void pm_suspend_marker(char *annotation) +{ + struct timespec ts; + struct rtc_time tm; + + getnstimeofday(&ts); + rtc_time_to_tm(ts.tv_sec, &tm); + pr_info("PM: suspend %s %d-%02d-%02d %02d:%02d:%02d.%09lu UTC\n", + annotation, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec); +} + /** * pm_suspend - Externally visible function for suspending the system. * @state: System sleep state to enter. @@ -541,6 +554,7 @@ int pm_suspend(suspend_state_t state) if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) return -EINVAL; + pm_suspend_marker("entry"); error = enter_state(state); if (error) { suspend_stats.fail++; @@ -548,6 +562,7 @@ int pm_suspend(suspend_state_t state) } else { suspend_stats.success++; } + pm_suspend_marker("exit"); return error; } EXPORT_SYMBOL(pm_suspend); -- GitLab From 6acefbee87ffaebacf4ee7ac2fc52a6117f9a40a Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Wed, 19 Feb 2014 15:30:47 -0800 Subject: [PATCH 0908/1442] ANDROID: Power: add an API to log wakeup reasons Add API log_wakeup_reason() and expose it to userspace via sysfs path /sys/kernel/wakeup_reasons/last_resume_reason Change-Id: I81addaf420f1338255c5d0638b0d244a99d777d1 Signed-off-by: Ruchi Kandoi --- include/linux/wakeup_reason.h | 23 ++++++ kernel/power/Makefile | 2 + kernel/power/wakeup_reason.c | 132 ++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+) create mode 100644 include/linux/wakeup_reason.h create mode 100644 kernel/power/wakeup_reason.c diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h new file mode 100644 index 000000000000..7ce50f0debc4 --- /dev/null +++ b/include/linux/wakeup_reason.h @@ -0,0 +1,23 @@ +/* + * include/linux/wakeup_reason.h + * + * Logs the reason which caused the kernel to resume + * from the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_WAKEUP_REASON_H +#define _LINUX_WAKEUP_REASON_H + +void log_wakeup_reason(int irq); + +#endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/kernel/power/Makefile b/kernel/power/Makefile index eb4f717705ba..80578f272be4 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -14,3 +14,5 @@ obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o + +obj-$(CONFIG_SUSPEND) += wakeup_reason.o diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c new file mode 100644 index 000000000000..ae9bfece9d9a --- /dev/null +++ b/kernel/power/wakeup_reason.c @@ -0,0 +1,132 @@ +/* + * kernel/power/wakeup_reason.c + * + * Logs the reasons which caused the kernel to resume from + * the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define MAX_WAKEUP_REASON_IRQS 32 +static int irq_list[MAX_WAKEUP_REASON_IRQS]; +static int irq_count; +static struct kobject *wakeup_reason; +static spinlock_t resume_reason_lock; + +static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int irq_no, buf_offset = 0; + struct irq_desc *desc; + spin_lock(&resume_reason_lock); + for (irq_no = 0; irq_no < irq_count; irq_no++) { + desc = irq_to_desc(irq_list[irq_no]); + if (desc && desc->action && desc->action->name) + buf_offset += sprintf(buf + buf_offset, "%d %s\n", + irq_list[irq_no], desc->action->name); + else + buf_offset += sprintf(buf + buf_offset, "%d\n", + irq_list[irq_no]); + } + spin_unlock(&resume_reason_lock); + return buf_offset; +} + +static struct kobj_attribute resume_reason = __ATTR(last_resume_reason, 0666, + reason_show, NULL); + +static struct attribute *attrs[] = { + &resume_reason.attr, + NULL, +}; +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +/* + * logs all the wake up reasons to the kernel + * stores the irqs to expose them to the userspace via sysfs + */ +void log_wakeup_reason(int irq) +{ + struct irq_desc *desc; + desc = irq_to_desc(irq); + if (desc && desc->action && desc->action->name) + printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq, + desc->action->name); + else + printk(KERN_INFO "Resume caused by IRQ %d\n", irq); + + spin_lock(&resume_reason_lock); + irq_list[irq_count++] = irq; + spin_unlock(&resume_reason_lock); +} + +/* Detects a suspend and clears all the previous wake up reasons*/ +static int wakeup_reason_pm_event(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + switch (pm_event) { + case PM_SUSPEND_PREPARE: + spin_lock(&resume_reason_lock); + irq_count = 0; + spin_unlock(&resume_reason_lock); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block wakeup_reason_pm_notifier_block = { + .notifier_call = wakeup_reason_pm_event, +}; + +/* Initializes the sysfs parameter + * registers the pm_event notifier + */ +void __init wakeup_reason_init(void) +{ + int retval; + spin_lock_init(&resume_reason_lock); + retval = register_pm_notifier(&wakeup_reason_pm_notifier_block); + if (retval) + printk(KERN_WARNING "[%s] failed to register PM notifier %d\n", + __func__, retval); + + wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj); + if (!wakeup_reason) { + printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n", + __func__); + return; + } + retval = sysfs_create_group(wakeup_reason, &attr_group); + if (retval) { + kobject_put(wakeup_reason); + printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n", + __func__, retval); + } +} + +late_initcall(wakeup_reason_init); -- GitLab From 1135122a192aea0d55c4006af322dd7bd0aefbb5 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Thu, 20 Feb 2014 19:47:38 -0800 Subject: [PATCH 0909/1442] ANDROID: POWER: fix compile warnings in log_wakeup_reason Change I81addaf420f1338255c5d0638b0d244a99d777d1 introduced compile warnings, fix these. Change-Id: I05482a5335599ab96c0a088a7d175c8d4cf1cf69 Signed-off-by: Ruchi Kandoi --- kernel/power/wakeup_reason.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index ae9bfece9d9a..82e69fe52d0b 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -35,7 +35,7 @@ static struct kobject *wakeup_reason; static spinlock_t resume_reason_lock; static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t count) + char *buf) { int irq_no, buf_offset = 0; struct irq_desc *desc; @@ -106,7 +106,7 @@ static struct notifier_block wakeup_reason_pm_notifier_block = { /* Initializes the sysfs parameter * registers the pm_event notifier */ -void __init wakeup_reason_init(void) +int __init wakeup_reason_init(void) { int retval; spin_lock_init(&resume_reason_lock); @@ -119,7 +119,7 @@ void __init wakeup_reason_init(void) if (!wakeup_reason) { printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n", __func__); - return; + return 1; } retval = sysfs_create_group(wakeup_reason, &attr_group); if (retval) { @@ -127,6 +127,7 @@ void __init wakeup_reason_init(void) printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n", __func__, retval); } + return 0; } late_initcall(wakeup_reason_init); -- GitLab From 1ceb7e26759ee19f0ef6ca6f087e1b01aa3355f5 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Fri, 7 Mar 2014 12:54:30 -0800 Subject: [PATCH 0910/1442] ANDROID: Power: Add guard condition for maximum wakeup reasons Ensure the array for the wakeup reason IRQs does not overflow. Change-Id: Iddc57a3aeb1888f39d4e7b004164611803a4d37c Signed-off-by: Ruchi Kandoi (cherry picked from commit b5ea40cdfcf38296535f931a7e5e7bf47b6fad7f) --- kernel/power/wakeup_reason.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 82e69fe52d0b..caf44213b14c 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -79,6 +79,13 @@ void log_wakeup_reason(int irq) printk(KERN_INFO "Resume caused by IRQ %d\n", irq); spin_lock(&resume_reason_lock); + if (irq_count == MAX_WAKEUP_REASON_IRQS) { + spin_unlock(&resume_reason_lock); + printk(KERN_WARNING "Resume caused by more than %d IRQs\n", + MAX_WAKEUP_REASON_IRQS); + return; + } + irq_list[irq_count++] = irq; spin_unlock(&resume_reason_lock); } -- GitLab From e13dbc7c69cd939c66d7fd80e96ccb0f97304b03 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Mon, 10 Mar 2014 14:21:30 -0700 Subject: [PATCH 0911/1442] ANDROID: power: wakeup_reason: rename irq_count to irqcount On x86, irq_count conflicts with a declaration in arch/x86/include/asm/processor.h Change-Id: I3e4fde0ff64ef59ff5ed2adc0ea3a644641ee0b7 Signed-off-by: Greg Hackmann --- kernel/power/wakeup_reason.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index caf44213b14c..188a6bfacf5a 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -30,7 +30,7 @@ #define MAX_WAKEUP_REASON_IRQS 32 static int irq_list[MAX_WAKEUP_REASON_IRQS]; -static int irq_count; +static int irqcount; static struct kobject *wakeup_reason; static spinlock_t resume_reason_lock; @@ -40,7 +40,7 @@ static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, int irq_no, buf_offset = 0; struct irq_desc *desc; spin_lock(&resume_reason_lock); - for (irq_no = 0; irq_no < irq_count; irq_no++) { + for (irq_no = 0; irq_no < irqcount; irq_no++) { desc = irq_to_desc(irq_list[irq_no]); if (desc && desc->action && desc->action->name) buf_offset += sprintf(buf + buf_offset, "%d %s\n", @@ -79,14 +79,14 @@ void log_wakeup_reason(int irq) printk(KERN_INFO "Resume caused by IRQ %d\n", irq); spin_lock(&resume_reason_lock); - if (irq_count == MAX_WAKEUP_REASON_IRQS) { + if (irqcount == MAX_WAKEUP_REASON_IRQS) { spin_unlock(&resume_reason_lock); printk(KERN_WARNING "Resume caused by more than %d IRQs\n", MAX_WAKEUP_REASON_IRQS); return; } - irq_list[irq_count++] = irq; + irq_list[irqcount++] = irq; spin_unlock(&resume_reason_lock); } @@ -97,7 +97,7 @@ static int wakeup_reason_pm_event(struct notifier_block *notifier, switch (pm_event) { case PM_SUSPEND_PREPARE: spin_lock(&resume_reason_lock); - irq_count = 0; + irqcount = 0; spin_unlock(&resume_reason_lock); break; default: -- GitLab From b4e6247778b0ba611812776821ad14e3e7d4aa10 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Thu, 24 Apr 2014 14:31:57 -0700 Subject: [PATCH 0912/1442] ANDROID: Power: Changes the permission to read only for sysfs file /sys/kernel/wakeup_reasons/last_resume_reason Change-Id: I8ac568a7cb58c31decd379195de517ff3c6f9c65 Signed-off-by: Ruchi Kandoi --- kernel/power/wakeup_reason.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 188a6bfacf5a..187e4e9105fb 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -34,7 +34,7 @@ static int irqcount; static struct kobject *wakeup_reason; static spinlock_t resume_reason_lock; -static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, +static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { int irq_no, buf_offset = 0; @@ -53,8 +53,7 @@ static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, return buf_offset; } -static struct kobj_attribute resume_reason = __ATTR(last_resume_reason, 0666, - reason_show, NULL); +static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); static struct attribute *attrs[] = { &resume_reason.attr, -- GitLab From 64062b35c3a2d1b15d2a49db8479ad3084898d2e Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Thu, 12 Dec 2013 15:59:09 -0800 Subject: [PATCH 0913/1442] ANDROID: power: Add property CHARGE_COUNTER_EXT and 64-bit precision properties Add POWER_SUPPLY_PROP_CHARGE_COUNTER_EXT that stores accumulated charge in nAh units as a signed 64-bit value. Add generic support for signed 64-bit property values. Change-Id: I2bd34b1e95ffba24e7bfef81f398f22bd2aaf05e Signed-off-by: Todd Poynor --- drivers/power/supply/power_supply_sysfs.c | 7 ++++++- include/linux/power_supply.h | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index f4ad43d820dd..fdb824fdf6c1 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -107,7 +107,10 @@ static ssize_t power_supply_show_property(struct device *dev, else if (off >= POWER_SUPPLY_PROP_MODEL_NAME) return sprintf(buf, "%s\n", value.strval); - return sprintf(buf, "%d\n", value.intval); + if (off == POWER_SUPPLY_PROP_CHARGE_COUNTER_EXT) + return sprintf(buf, "%lld\n", value.int64val); + else + return sprintf(buf, "%d\n", value.intval); } static ssize_t power_supply_store_property(struct device *dev, @@ -202,6 +205,8 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(usb_hc), POWER_SUPPLY_ATTR(usb_otg), POWER_SUPPLY_ATTR(charge_enabled), + /* Local extensions of type int64_t */ + POWER_SUPPLY_ATTR(charge_counter_ext), /* Properties of type `const char *' */ POWER_SUPPLY_ATTR(model_name), POWER_SUPPLY_ATTR(manufacturer), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 1a7e4f02e3ea..ecfb4cac973c 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * All voltages, currents, charges, energies, time and temperatures in uV, @@ -152,6 +153,8 @@ enum power_supply_property { POWER_SUPPLY_PROP_USB_HC, POWER_SUPPLY_PROP_USB_OTG, POWER_SUPPLY_PROP_CHARGE_ENABLED, + /* Local extensions of type int64_t */ + POWER_SUPPLY_PROP_CHARGE_COUNTER_EXT, /* Properties of type `const char *' */ POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, @@ -179,6 +182,7 @@ enum power_supply_notifier_events { union power_supply_propval { int intval; const char *strval; + int64_t int64val; }; struct device_node; -- GitLab From f118f739be97c23cf735987f5ff11b7b580a502a Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Tue, 14 Oct 2014 17:43:21 -0700 Subject: [PATCH 0914/1442] ANDROID: power: Avoids bogus error messages for the suspend aborts. Avoids printing bogus error message "tasks refusing to freeze", in cases where pending wakeup source caused the suspend abort. Signed-off-by: Ruchi Kandoi Change-Id: I913ad290f501b31cd536d039834c8d24c6f16928 --- kernel/power/process.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/kernel/power/process.c b/kernel/power/process.c index 2fba066e125f..41c6f8af4b9f 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -81,26 +81,27 @@ static int try_to_freeze_tasks(bool user_only) elapsed = ktime_sub(end, start); elapsed_msecs = ktime_to_ms(elapsed); - if (todo) { + if (wakeup) { pr_cont("\n"); - pr_err("Freezing of tasks %s after %d.%03d seconds " - "(%d tasks refusing to freeze, wq_busy=%d):\n", - wakeup ? "aborted" : "failed", + pr_err("Freezing of tasks aborted after %d.%03d seconds", + elapsed_msecs / 1000, elapsed_msecs % 1000); + } else if (todo) { + pr_cont("\n"); + pr_err("Freezing of tasks failed after %d.%03d seconds" + " (%d tasks refusing to freeze, wq_busy=%d):\n", elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); if (wq_busy) show_workqueue_state(); - if (!wakeup) { - read_lock(&tasklist_lock); - for_each_process_thread(g, p) { - if (p != current && !freezer_should_skip(p) - && freezing(p) && !frozen(p)) - sched_show_task(p); - } - read_unlock(&tasklist_lock); + read_lock(&tasklist_lock); + for_each_process_thread(g, p) { + if (p != current && !freezer_should_skip(p) + && freezing(p) && !frozen(p)) + sched_show_task(p); } + read_unlock(&tasklist_lock); } else { pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, elapsed_msecs % 1000); -- GitLab From 6118cb49fd4ea0fc83e1e2faad19c86a6be9346e Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Wed, 29 Oct 2014 10:36:27 -0700 Subject: [PATCH 0915/1442] ANDROID: power: Adds functionality to log the last suspend abort reason. Extends the last_resume_reason to log suspend abort reason. The abort reasons will have "Abort:" appended at the start to distinguish itself from the resume reason. Signed-off-by: Ruchi Kandoi Change-Id: I3207f1844e3d87c706dfc298fb10e1c648814c5f --- drivers/base/power/main.c | 5 +++++ drivers/base/power/wakeup.c | 16 +++++++++++++ drivers/base/syscore.c | 3 +++ include/linux/suspend.h | 1 + include/linux/wakeup_reason.h | 4 +++- kernel/power/process.c | 5 +++++ kernel/power/suspend.c | 20 +++++++++++++++-- kernel/power/wakeup_reason.c | 42 ++++++++++++++++++++++++++++------- 8 files changed, 85 insertions(+), 11 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 2932a5bd892f..4f991013f682 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "../base.h" #include "power.h" @@ -1353,6 +1354,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_callback_t callback = NULL; char *info = NULL; int error = 0; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; DECLARE_DPM_WATCHDOG_ON_STACK(wd); TRACE_DEVICE(dev); @@ -1373,6 +1375,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_wakeup_event(dev, 0); if (pm_wakeup_pending()) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); async_error = -EBUSY; goto Complete; } diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 62e4de2aa8d1..562f21c57193 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -802,6 +802,22 @@ void pm_wakeup_event(struct device *dev, unsigned int msec) } EXPORT_SYMBOL_GPL(pm_wakeup_event); +void pm_get_active_wakeup_sources(char *pending_wakeup_source, size_t max) +{ + struct wakeup_source *ws; + int len = 0; + rcu_read_lock(); + len += snprintf(pending_wakeup_source, max, "Pending Wakeup Sources: "); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + if (ws->active) { + len += snprintf(pending_wakeup_source + len, max, + "%s ", ws->name); + } + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(pm_get_active_wakeup_sources); + void pm_print_active_wakeup_sources(void) { struct wakeup_source *ws; diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c index 8d98a329f6ea..96c34a95cc62 100644 --- a/drivers/base/syscore.c +++ b/drivers/base/syscore.c @@ -11,6 +11,7 @@ #include #include #include +#include static LIST_HEAD(syscore_ops_list); static DEFINE_MUTEX(syscore_ops_lock); @@ -75,6 +76,8 @@ int syscore_suspend(void) return 0; err_out: + log_suspend_abort_reason("System core suspend callback %pF failed", + ops->suspend); pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend); list_for_each_entry_continue(ops, &syscore_ops_list, node) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index d9718378a8be..448321bb8769 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -434,6 +434,7 @@ extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); extern void pm_print_active_wakeup_sources(void); +extern void pm_get_active_wakeup_sources(char *pending_sources, size_t max); static inline void lock_system_sleep(void) { diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h index 7ce50f0debc4..5f095da2c977 100644 --- a/include/linux/wakeup_reason.h +++ b/include/linux/wakeup_reason.h @@ -18,6 +18,8 @@ #ifndef _LINUX_WAKEUP_REASON_H #define _LINUX_WAKEUP_REASON_H -void log_wakeup_reason(int irq); +#define MAX_SUSPEND_ABORT_LEN 256 +void log_wakeup_reason(int irq); +void log_suspend_abort_reason(const char *fmt, ...); #endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/kernel/power/process.c b/kernel/power/process.c index 41c6f8af4b9f..8da3cfff682d 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -18,6 +18,7 @@ #include #include #include +#include /* * Timeout for stopping processes @@ -34,6 +35,7 @@ static int try_to_freeze_tasks(bool user_only) unsigned int elapsed_msecs; bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; start = ktime_get_boottime(); @@ -63,6 +65,9 @@ static int try_to_freeze_tasks(bool user_only) break; if (pm_wakeup_pending()) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); wakeup = true; break; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 028d2bee62b0..2d0c99b3f34c 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "power.h" @@ -323,7 +324,8 @@ void __weak arch_suspend_enable_irqs(void) */ static int suspend_enter(suspend_state_t state, bool *wakeup) { - int error; + char suspend_abort[MAX_SUSPEND_ABORT_LEN]; + int error, last_dev; error = platform_suspend_prepare(state); if (error) @@ -331,7 +333,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) error = dpm_suspend_late(PMSG_SUSPEND); if (error) { + last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + last_dev %= REC_FAILED_NUM; pr_err("PM: late suspend of devices failed\n"); + log_suspend_abort_reason("%s device failed to power down", + suspend_stats.failed_devs[last_dev]); goto Platform_finish; } error = platform_suspend_prepare_late(state); @@ -340,7 +346,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) error = dpm_suspend_noirq(PMSG_SUSPEND); if (error) { + last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + last_dev %= REC_FAILED_NUM; pr_err("PM: noirq suspend of devices failed\n"); + log_suspend_abort_reason("noirq suspend of %s device failed", + suspend_stats.failed_devs[last_dev]); goto Platform_early_resume; } error = platform_suspend_prepare_noirq(state); @@ -364,8 +374,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) } error = disable_nonboot_cpus(); - if (error || suspend_test(TEST_CPUS)) + if (error || suspend_test(TEST_CPUS)) { + log_suspend_abort_reason("Disabling non-boot cpus failed"); goto Enable_cpus; + } arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -381,6 +393,9 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) state, false); events_check_enabled = false; } else if (*wakeup) { + pm_get_active_wakeup_sources(suspend_abort, + MAX_SUSPEND_ABORT_LEN); + log_suspend_abort_reason(suspend_abort); error = -EBUSY; } syscore_resume(); @@ -428,6 +443,7 @@ int suspend_devices_and_enter(suspend_state_t state) error = dpm_suspend_start(PMSG_SUSPEND); if (error) { pr_err("PM: Some devices failed to suspend, or early wake event detected\n"); + log_suspend_abort_reason("Some devices failed to suspend, or early wake event detected"); goto Recover_platform; } suspend_test_finish("suspend devices"); diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 187e4e9105fb..2aacc34ef17c 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -31,6 +31,8 @@ #define MAX_WAKEUP_REASON_IRQS 32 static int irq_list[MAX_WAKEUP_REASON_IRQS]; static int irqcount; +static bool suspend_abort; +static char abort_reason[MAX_SUSPEND_ABORT_LEN]; static struct kobject *wakeup_reason; static spinlock_t resume_reason_lock; @@ -40,14 +42,18 @@ static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribu int irq_no, buf_offset = 0; struct irq_desc *desc; spin_lock(&resume_reason_lock); - for (irq_no = 0; irq_no < irqcount; irq_no++) { - desc = irq_to_desc(irq_list[irq_no]); - if (desc && desc->action && desc->action->name) - buf_offset += sprintf(buf + buf_offset, "%d %s\n", - irq_list[irq_no], desc->action->name); - else - buf_offset += sprintf(buf + buf_offset, "%d\n", - irq_list[irq_no]); + if (suspend_abort) { + buf_offset = sprintf(buf, "Abort: %s", abort_reason); + } else { + for (irq_no = 0; irq_no < irqcount; irq_no++) { + desc = irq_to_desc(irq_list[irq_no]); + if (desc && desc->action && desc->action->name) + buf_offset += sprintf(buf + buf_offset, "%d %s\n", + irq_list[irq_no], desc->action->name); + else + buf_offset += sprintf(buf + buf_offset, "%d\n", + irq_list[irq_no]); + } } spin_unlock(&resume_reason_lock); return buf_offset; @@ -89,6 +95,25 @@ void log_wakeup_reason(int irq) spin_unlock(&resume_reason_lock); } +void log_suspend_abort_reason(const char *fmt, ...) +{ + va_list args; + + spin_lock(&resume_reason_lock); + + //Suspend abort reason has already been logged. + if (suspend_abort) { + spin_unlock(&resume_reason_lock); + return; + } + + suspend_abort = true; + va_start(args, fmt); + snprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args); + va_end(args); + spin_unlock(&resume_reason_lock); +} + /* Detects a suspend and clears all the previous wake up reasons*/ static int wakeup_reason_pm_event(struct notifier_block *notifier, unsigned long pm_event, void *unused) @@ -97,6 +122,7 @@ static int wakeup_reason_pm_event(struct notifier_block *notifier, case PM_SUSPEND_PREPARE: spin_lock(&resume_reason_lock); irqcount = 0; + suspend_abort = false; spin_unlock(&resume_reason_lock); break; default: -- GitLab From dfa40333e83a7517a4d76b2d865c1f01aa6ae962 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Fri, 31 Oct 2014 16:05:46 -0700 Subject: [PATCH 0916/1442] ANDROID: power: Add check_wakeup_reason() to verify wakeup source irq Wakeup reason is set before driver resume handlers are called. It is cleared before driver suspend handlers are called, on PM_SUSPEND_PREPARE. Change-Id: I04218c9b0c115a7877e8029c73e6679ff82e0aa4 Signed-off-by: Dmitry Shmidt --- include/linux/wakeup_reason.h | 2 ++ kernel/power/wakeup_reason.c | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h index 5f095da2c977..ad8b76936c7f 100644 --- a/include/linux/wakeup_reason.h +++ b/include/linux/wakeup_reason.h @@ -22,4 +22,6 @@ void log_wakeup_reason(int irq); void log_suspend_abort_reason(const char *fmt, ...); +int check_wakeup_reason(int irq); + #endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 2aacc34ef17c..085c99edca06 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -34,7 +34,7 @@ static int irqcount; static bool suspend_abort; static char abort_reason[MAX_SUSPEND_ABORT_LEN]; static struct kobject *wakeup_reason; -static spinlock_t resume_reason_lock; +static DEFINE_SPINLOCK(resume_reason_lock); static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -95,6 +95,21 @@ void log_wakeup_reason(int irq) spin_unlock(&resume_reason_lock); } +int check_wakeup_reason(int irq) +{ + int irq_no; + int ret = false; + + spin_lock(&resume_reason_lock); + for (irq_no = 0; irq_no < irqcount; irq_no++) + if (irq_list[irq_no] == irq) { + ret = true; + break; + } + spin_unlock(&resume_reason_lock); + return ret; +} + void log_suspend_abort_reason(const char *fmt, ...) { va_list args; @@ -141,7 +156,7 @@ static struct notifier_block wakeup_reason_pm_notifier_block = { int __init wakeup_reason_init(void) { int retval; - spin_lock_init(&resume_reason_lock); + retval = register_pm_notifier(&wakeup_reason_pm_notifier_block); if (retval) printk(KERN_WARNING "[%s] failed to register PM notifier %d\n", -- GitLab From 00a83e61b4fcb126c366fc2fe56b6c29de747014 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 27 Nov 2014 15:12:10 +0900 Subject: [PATCH 0917/1442] ANDROID: Make suspend abort reason logging depend on CONFIG_PM_SLEEP This unbreaks the build on architectures such as um that do not support CONFIG_PM_SLEEP. Change-Id: Ia846ed0a7fca1d762ececad20748d23610e8544f Signed-off-by: Lorenzo Colitti --- kernel/power/process.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/power/process.c b/kernel/power/process.c index 8da3cfff682d..68d27ae215a2 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -35,7 +35,9 @@ static int try_to_freeze_tasks(bool user_only) unsigned int elapsed_msecs; bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; +#ifdef CONFIG_PM_SLEEP char suspend_abort[MAX_SUSPEND_ABORT_LEN]; +#endif start = ktime_get_boottime(); @@ -65,9 +67,11 @@ static int try_to_freeze_tasks(bool user_only) break; if (pm_wakeup_pending()) { +#ifdef CONFIG_PM_SLEEP pm_get_active_wakeup_sources(suspend_abort, MAX_SUSPEND_ABORT_LEN); log_suspend_abort_reason(suspend_abort); +#endif wakeup = true; break; } -- GitLab From e2cc63fd10df81c559aec99d595fa3323742fb83 Mon Sep 17 00:00:00 2001 From: jinqian Date: Wed, 25 Mar 2015 16:18:44 -0700 Subject: [PATCH 0918/1442] ANDROID: Power: Report suspend times from last_suspend_time This node epxorts two values separated by space. From left to right: 1. time spent in suspend/resume process 2. time spent sleep in suspend state Change-Id: I2cb9a9408a5fd12166aaec11b935a0fd6a408c63 --- .../ABI/testing/sysfs-kernel-wakeup_reasons | 16 +++++++++ kernel/power/wakeup_reason.c | 36 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-kernel-wakeup_reasons diff --git a/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons b/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons new file mode 100644 index 000000000000..acb19b91c192 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons @@ -0,0 +1,16 @@ +What: /sys/kernel/wakeup_reasons/last_resume_reason +Date: February 2014 +Contact: Ruchi Kandoi +Description: + The /sys/kernel/wakeup_reasons/last_resume_reason is + used to report wakeup reasons after system exited suspend. + +What: /sys/kernel/wakeup_reasons/last_suspend_time +Date: March 2015 +Contact: jinqian +Description: + The /sys/kernel/wakeup_reasons/last_suspend_time is + used to report time spent in last suspend cycle. It contains + two numbers (in seconds) separated by space. First number is + the time spent in suspend and resume processes. Second number + is the time spent in sleep state. \ No newline at end of file diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 085c99edca06..76b53400cb16 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -36,6 +36,11 @@ static char abort_reason[MAX_SUSPEND_ABORT_LEN]; static struct kobject *wakeup_reason; static DEFINE_SPINLOCK(resume_reason_lock); +static struct timespec last_xtime; /* wall time before last suspend */ +static struct timespec curr_xtime; /* wall time after last suspend */ +static struct timespec last_stime; /* total_sleep_time before last suspend */ +static struct timespec curr_stime; /* total_sleep_time after last suspend */ + static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -59,10 +64,32 @@ static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribu return buf_offset; } +static ssize_t last_suspend_time_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct timespec sleep_time; + struct timespec total_time; + struct timespec suspend_resume_time; + + sleep_time = timespec_sub(curr_stime, last_stime); + total_time = timespec_sub(curr_xtime, last_xtime); + suspend_resume_time = timespec_sub(total_time, sleep_time); + + /* + * suspend_resume_time is calculated from sleep_time. Userspace would + * always need both. Export them in pair here. + */ + return sprintf(buf, "%lu.%09lu %lu.%09lu\n", + suspend_resume_time.tv_sec, suspend_resume_time.tv_nsec, + sleep_time.tv_sec, sleep_time.tv_nsec); +} + static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); +static struct kobj_attribute suspend_time = __ATTR_RO(last_suspend_time); static struct attribute *attrs[] = { &resume_reason.attr, + &suspend_time.attr, NULL, }; static struct attribute_group attr_group = { @@ -133,12 +160,21 @@ void log_suspend_abort_reason(const char *fmt, ...) static int wakeup_reason_pm_event(struct notifier_block *notifier, unsigned long pm_event, void *unused) { + struct timespec xtom; /* wall_to_monotonic, ignored */ + switch (pm_event) { case PM_SUSPEND_PREPARE: spin_lock(&resume_reason_lock); irqcount = 0; suspend_abort = false; spin_unlock(&resume_reason_lock); + + get_xtime_and_monotonic_and_sleep_offset(&last_xtime, &xtom, + &last_stime); + break; + case PM_POST_SUSPEND: + get_xtime_and_monotonic_and_sleep_offset(&curr_xtime, &xtom, + &curr_stime); break; default: break; -- GitLab From 68b6254bb40c8fb78ef1b0db4e6c173d50daaeac Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Wed, 8 Apr 2015 15:42:29 -0700 Subject: [PATCH 0919/1442] ANDROID: wakeup: Add last wake up source logging for suspend abort reason. There is a possibility that a wakeup source event is received after the device prepares to suspend which might cause the suspend to abort. This patch adds the functionality of reporting the last active wakeup source which is currently not active but caused the suspend to abort reason via the /sys/kernel/power/last_wakeup_reason file. Change-Id: I1760d462f497b33e425f5565cb6cff5973932ec3 Signed-off-by: Ruchi Kandoi --- drivers/base/power/wakeup.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 562f21c57193..e899fdafcd74 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "power.h" @@ -804,16 +805,31 @@ EXPORT_SYMBOL_GPL(pm_wakeup_event); void pm_get_active_wakeup_sources(char *pending_wakeup_source, size_t max) { - struct wakeup_source *ws; + struct wakeup_source *ws, *last_active_ws = NULL; int len = 0; + bool active = false; + rcu_read_lock(); - len += snprintf(pending_wakeup_source, max, "Pending Wakeup Sources: "); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { if (ws->active) { - len += snprintf(pending_wakeup_source + len, max, + if (!active) + len += scnprintf(pending_wakeup_source, max, + "Pending Wakeup Sources: "); + len += scnprintf(pending_wakeup_source + len, max - len, "%s ", ws->name); + active = true; + } else if (!active && + (!last_active_ws || + ktime_to_ns(ws->last_time) > + ktime_to_ns(last_active_ws->last_time))) { + last_active_ws = ws; } } + if (!active && last_active_ws) { + scnprintf(pending_wakeup_source, max, + "Last active Wakeup Source: %s", + last_active_ws->name); + } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(pm_get_active_wakeup_sources); -- GitLab From 7df92a19af9343a76100235bd76437afd9077607 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 14 Apr 2015 02:38:20 +0530 Subject: [PATCH 0920/1442] ANDROID: power: wakeup_reason: fix suspend time reporting Suspend time reporting Change-Id: I2cb9a9408a5fd12166aaec11b935a0fd6a408c63 (Power: Report suspend times from last_suspend_time), is broken on 3.16+ kernels because get_xtime_and_monotonic_and_sleep_offset() hrtimer helper routine is removed from kernel timekeeping. The replacement helper routines ktime_get_update_offsets_{tick,now}() are private to core kernel timekeeping so we can't use them, hence using ktime_get() and ktime_get_boottime() instead and sampling the time twice. Idea is to use Monotonic boottime offset to calculate total time spent in last suspend state and CLOCK_MONOTONIC to calculate time spent in last suspend-resume process. Signed-off-by: Amit Pundir --- kernel/power/wakeup_reason.c | 41 ++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 76b53400cb16..21787ebb332f 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -36,10 +36,10 @@ static char abort_reason[MAX_SUSPEND_ABORT_LEN]; static struct kobject *wakeup_reason; static DEFINE_SPINLOCK(resume_reason_lock); -static struct timespec last_xtime; /* wall time before last suspend */ -static struct timespec curr_xtime; /* wall time after last suspend */ -static struct timespec last_stime; /* total_sleep_time before last suspend */ -static struct timespec curr_stime; /* total_sleep_time after last suspend */ +static ktime_t last_monotime; /* monotonic time before last suspend */ +static ktime_t curr_monotime; /* monotonic time after last suspend */ +static ktime_t last_stime; /* monotonic boottime offset before last suspend */ +static ktime_t curr_stime; /* monotonic boottime offset after last suspend */ static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -71,14 +71,22 @@ static ssize_t last_suspend_time_show(struct kobject *kobj, struct timespec total_time; struct timespec suspend_resume_time; - sleep_time = timespec_sub(curr_stime, last_stime); - total_time = timespec_sub(curr_xtime, last_xtime); - suspend_resume_time = timespec_sub(total_time, sleep_time); + /* + * total_time is calculated from monotonic bootoffsets because + * unlike CLOCK_MONOTONIC it include the time spent in suspend state. + */ + total_time = ktime_to_timespec(ktime_sub(curr_stime, last_stime)); /* - * suspend_resume_time is calculated from sleep_time. Userspace would - * always need both. Export them in pair here. + * suspend_resume_time is calculated as monotonic (CLOCK_MONOTONIC) + * time interval before entering suspend and post suspend. */ + suspend_resume_time = ktime_to_timespec(ktime_sub(curr_monotime, last_monotime)); + + /* sleep_time = total_time - suspend_resume_time */ + sleep_time = timespec_sub(total_time, suspend_resume_time); + + /* Export suspend_resume_time and sleep_time in pair here. */ return sprintf(buf, "%lu.%09lu %lu.%09lu\n", suspend_resume_time.tv_sec, suspend_resume_time.tv_nsec, sleep_time.tv_sec, sleep_time.tv_nsec); @@ -160,21 +168,22 @@ void log_suspend_abort_reason(const char *fmt, ...) static int wakeup_reason_pm_event(struct notifier_block *notifier, unsigned long pm_event, void *unused) { - struct timespec xtom; /* wall_to_monotonic, ignored */ - switch (pm_event) { case PM_SUSPEND_PREPARE: spin_lock(&resume_reason_lock); irqcount = 0; suspend_abort = false; spin_unlock(&resume_reason_lock); - - get_xtime_and_monotonic_and_sleep_offset(&last_xtime, &xtom, - &last_stime); + /* monotonic time since boot */ + last_monotime = ktime_get(); + /* monotonic time since boot including the time spent in suspend */ + last_stime = ktime_get_boottime(); break; case PM_POST_SUSPEND: - get_xtime_and_monotonic_and_sleep_offset(&curr_xtime, &xtom, - &curr_stime); + /* monotonic time since boot */ + curr_monotime = ktime_get(); + /* monotonic time since boot including the time spent in suspend */ + curr_stime = ktime_get_boottime(); break; default: break; -- GitLab From 9d17e24b036e49dcdf070123d81444812684c026 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Wed, 5 Aug 2015 16:54:53 -0700 Subject: [PATCH 0921/1442] ANDROID: wakeup_reason: use vsnprintf instead of snsprintf for vargs. Bug: 22368519 Signed-off-by: Ruchi Kandoi --- kernel/power/wakeup_reason.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 21787ebb332f..252611fad2fe 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -159,7 +159,7 @@ void log_suspend_abort_reason(const char *fmt, ...) suspend_abort = true; va_start(args, fmt); - snprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args); + vsnprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args); va_end(args); spin_unlock(&resume_reason_lock); } -- GitLab From ff8b80819cf4d76ff7fdfeb85d35f28f916105bd Mon Sep 17 00:00:00 2001 From: San Mehat Date: Wed, 16 Sep 2009 12:39:10 -0700 Subject: [PATCH 0922/1442] ANDROID: proc: smaps: Allow smaps access for CAP_SYS_RESOURCE Signed-off-by: San Mehat --- kernel/fork.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 997ac1d584f7..27e9af69a742 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1007,7 +1007,8 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) mm = get_task_mm(task); if (mm && mm != current->mm && - !ptrace_may_access(task, mode)) { + !ptrace_may_access(task, mode) && + !capable(CAP_SYS_RESOURCE)) { mmput(mm); mm = ERR_PTR(-EACCES); } -- GitLab From 2956c9be7e5a8a2a89e790e20d2953b490571a4c Mon Sep 17 00:00:00 2001 From: Rom Lemarchand Date: Sat, 7 Mar 2015 09:38:05 -0800 Subject: [PATCH 0923/1442] ANDROID: proc: make oom adjustment files user read-only Make oom_adj and oom_score_adj user read-only. Bug: 19636629 Change-Id: I055bb172d5b4d3d856e25918f3c5de8edf31e4a3 Signed-off-by: Rom Lemarchand --- fs/proc/base.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index ca651ac00660..77b8a1e163d9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2912,8 +2912,9 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), - REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), + INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), @@ -3301,8 +3302,8 @@ static const struct pid_entry tid_base_stuff[] = { ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), - REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), + REG("oom_adj", S_IRUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), -- GitLab From f049c419aee1ad45a55f30c94e6f3b5677419a75 Mon Sep 17 00:00:00 2001 From: Dan Willemsen Date: Wed, 18 Mar 2015 11:22:44 -0700 Subject: [PATCH 0924/1442] ANDROID: fixup! proc: make oom adjustment files user read-only Fix the build by removing the duplicate line that uses the obsolete INF macro. Signed-off-by: Dan Willemsen --- fs/proc/base.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 77b8a1e163d9..c01eeaade636 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2912,7 +2912,6 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), - INF("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL -- GitLab From 690142232628017555ea7eb26e9d782673f37cc1 Mon Sep 17 00:00:00 2001 From: jinqian Date: Wed, 11 Mar 2015 10:44:50 -0700 Subject: [PATCH 0925/1442] ANDROID: proc: uid: Adds accounting for the cputimes per uid. Adds proc files /proc/uid_cputime/show_uid_stat and /proc/uid_cputime/remove_uid_range. show_uid_stat lists the total utime and stime for the active as well as terminated processes for each of the uids. Writing a range of uids to remove_uid_range will delete the accounting for all the uids within that range. Change-Id: I21d9210379da730b33ddc1a0ea663c8c9d2ac15b --- drivers/misc/Kconfig | 6 + drivers/misc/Makefile | 2 + drivers/misc/uid_cputime.c | 235 +++++++++++++++++++++++++++++++++++++ 3 files changed, 243 insertions(+) create mode 100644 drivers/misc/uid_cputime.c diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 64971baf11fa..1e3e5b88264f 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -766,6 +766,12 @@ config PANEL_BOOT_MESSAGE An empty message will only clear the display at driver init time. Any other printf()-formatted message is valid with newline and escape codes. +config UID_CPUTIME + tristate "Per-UID cpu time statistics" + depends on PROFILING + help + Per UID based cpu time statistics exported to /proc/uid_cputime + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 31983366090a..3ac6e9537891 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -54,6 +54,8 @@ obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o obj-$(CONFIG_CXL_BASE) += cxl/ obj-$(CONFIG_PANEL) += panel.o +obj-$(CONFIG_UID_CPUTIME) += uid_cputime.o + lkdtm-$(CONFIG_LKDTM) += lkdtm_core.o lkdtm-$(CONFIG_LKDTM) += lkdtm_bugs.o lkdtm-$(CONFIG_LKDTM) += lkdtm_heap.o diff --git a/drivers/misc/uid_cputime.c b/drivers/misc/uid_cputime.c new file mode 100644 index 000000000000..cb26e3c118b1 --- /dev/null +++ b/drivers/misc/uid_cputime.c @@ -0,0 +1,235 @@ +/* drivers/misc/uid_cputime.c + * + * Copyright (C) 2014 - 2015 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define UID_HASH_BITS 10 +DECLARE_HASHTABLE(hash_table, UID_HASH_BITS); + +static DEFINE_MUTEX(uid_lock); +static struct proc_dir_entry *parent; + +struct uid_entry { + uid_t uid; + cputime_t utime; + cputime_t stime; + cputime_t active_utime; + cputime_t active_stime; + struct hlist_node hash; +}; + +static struct uid_entry *find_uid_entry(uid_t uid) +{ + struct uid_entry *uid_entry; + hash_for_each_possible(hash_table, uid_entry, hash, uid) { + if (uid_entry->uid == uid) + return uid_entry; + } + return NULL; +} + +static struct uid_entry *find_or_register_uid(uid_t uid) +{ + struct uid_entry *uid_entry; + + uid_entry = find_uid_entry(uid); + if (uid_entry) + return uid_entry; + + uid_entry = kzalloc(sizeof(struct uid_entry), GFP_ATOMIC); + if (!uid_entry) + return NULL; + + uid_entry->uid = uid; + + hash_add(hash_table, &uid_entry->hash, uid); + + return uid_entry; +} + +static int uid_stat_show(struct seq_file *m, void *v) +{ + struct uid_entry *uid_entry; + struct task_struct *task; + cputime_t utime; + cputime_t stime; + unsigned long bkt; + + mutex_lock(&uid_lock); + + hash_for_each(hash_table, bkt, uid_entry, hash) { + uid_entry->active_stime = 0; + uid_entry->active_utime = 0; + } + + read_lock(&tasklist_lock); + for_each_process(task) { + uid_entry = find_or_register_uid(task_uid(task)); + if (!uid_entry) { + read_unlock(&tasklist_lock); + mutex_unlock(&uid_lock); + pr_err("%s: failed to find the uid_entry for uid %d\n", + __func__, task_uid(task)); + return -ENOMEM; + } + task_cputime_adjusted(task, &utime, &stime); + uid_entry->active_utime += utime; + uid_entry->active_stime += stime; + } + read_unlock(&tasklist_lock); + + hash_for_each(hash_table, bkt, uid_entry, hash) { + cputime_t total_utime = uid_entry->utime + + uid_entry->active_utime; + cputime_t total_stime = uid_entry->stime + + uid_entry->active_stime; + seq_printf(m, "%d: %u %u\n", uid_entry->uid, + cputime_to_usecs(total_utime), + cputime_to_usecs(total_stime)); + } + + mutex_unlock(&uid_lock); + return 0; +} + +static int uid_stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, uid_stat_show, PDE_DATA(inode)); +} + +static const struct file_operations uid_stat_fops = { + .open = uid_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int uid_remove_open(struct inode *inode, struct file *file) +{ + return single_open(file, NULL, NULL); +} + +static ssize_t uid_remove_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) +{ + struct uid_entry *uid_entry; + struct hlist_node *tmp; + char uids[128]; + char *start_uid, *end_uid = NULL; + long int uid_start = 0, uid_end = 0; + + if (count >= sizeof(uids)) + count = sizeof(uids) - 1; + + if (copy_from_user(uids, buffer, count)) + return -EFAULT; + + uids[count] = '\0'; + end_uid = uids; + start_uid = strsep(&end_uid, "-"); + + if (!start_uid || !end_uid) + return -EINVAL; + + if (kstrtol(start_uid, 10, &uid_start) != 0 || + kstrtol(end_uid, 10, &uid_end) != 0) { + return -EINVAL; + } + + mutex_lock(&uid_lock); + + for (; uid_start <= uid_end; uid_start++) { + hash_for_each_possible_safe(hash_table, uid_entry, tmp, + hash, uid_start) { + hash_del(&uid_entry->hash); + kfree(uid_entry); + } + } + + mutex_unlock(&uid_lock); + return count; +} + +static const struct file_operations uid_remove_fops = { + .open = uid_remove_open, + .release = single_release, + .write = uid_remove_write, +}; + +static int process_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + struct task_struct *task = v; + struct uid_entry *uid_entry; + cputime_t utime, stime; + uid_t uid; + + if (!task) + return NOTIFY_OK; + + mutex_lock(&uid_lock); + uid = task_uid(task); + uid_entry = find_or_register_uid(uid); + if (!uid_entry) { + pr_err("%s: failed to find uid %d\n", __func__, uid); + goto exit; + } + + task_cputime_adjusted(task, &utime, &stime); + uid_entry->utime += utime; + uid_entry->stime += stime; + +exit: + mutex_unlock(&uid_lock); + return NOTIFY_OK; +} + +static struct notifier_block process_notifier_block = { + .notifier_call = process_notifier, +}; + +static int __init proc_uid_cputime_init(void) +{ + hash_init(hash_table); + + parent = proc_mkdir("uid_cputime", NULL); + if (!parent) { + pr_err("%s: failed to create proc entry\n", __func__); + return -ENOMEM; + } + + proc_create_data("remove_uid_range", S_IWUGO, parent, &uid_remove_fops, + NULL); + + proc_create_data("show_uid_stat", S_IWUGO, parent, &uid_stat_fops, + NULL); + + profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block); + + return 0; +} + +early_initcall(proc_uid_cputime_init); -- GitLab From 48a9906c0fd8cd7098cc793ff57e5beef3abb6d1 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Wed, 15 Apr 2015 00:40:21 +0530 Subject: [PATCH 0926/1442] ANDROID: proc: uid_cputime: create uids from kuids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create uids from kuids using from_kuid_munged(), otherwise we run into following build error and warnings: -------------------- CC drivers/misc/uid_cputime.o drivers/misc/uid_cputime.c: In function ‘uid_stat_show’: drivers/misc/uid_cputime.c:90:36: error: incompatible type for argument 1 of ‘find_or_register_uid’ drivers/misc/uid_cputime.c:54:26: note: expected ‘uid_t’ but argument is of type ‘kuid_t’ drivers/misc/uid_cputime.c:94:4: warning: format ‘%d’ expects argument of type ‘int’, but argument 3 has type ‘kuid_t’ [-Wformat] drivers/misc/uid_cputime.c: In function ‘process_notifier’: drivers/misc/uid_cputime.c:194:6: error: incompatible types when assigning to type ‘uid_t’ from type ‘kuid_t’ make[2]: *** [drivers/misc/uid_cputime.o] Error 1 -------------------- Change-Id: Ifecb98001f7fe2fac74d1ef3e1abd03d43fc9059 Signed-off-by: Amit Pundir (cherry picked from commit b0f4decae627cf2d74e6f72c7ecb939c77d48625) --- drivers/misc/uid_cputime.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/misc/uid_cputime.c b/drivers/misc/uid_cputime.c index cb26e3c118b1..acd7046ce497 100644 --- a/drivers/misc/uid_cputime.c +++ b/drivers/misc/uid_cputime.c @@ -87,12 +87,14 @@ static int uid_stat_show(struct seq_file *m, void *v) read_lock(&tasklist_lock); for_each_process(task) { - uid_entry = find_or_register_uid(task_uid(task)); + uid_entry = find_or_register_uid(from_kuid_munged( + current_user_ns(), task_uid(task))); if (!uid_entry) { read_unlock(&tasklist_lock); mutex_unlock(&uid_lock); pr_err("%s: failed to find the uid_entry for uid %d\n", - __func__, task_uid(task)); + __func__, from_kuid_munged(current_user_ns(), + task_uid(task))); return -ENOMEM; } task_cputime_adjusted(task, &utime, &stime); @@ -191,7 +193,7 @@ static int process_notifier(struct notifier_block *self, return NOTIFY_OK; mutex_lock(&uid_lock); - uid = task_uid(task); + uid = from_kuid_munged(current_user_ns(), task_uid(task)); uid_entry = find_or_register_uid(uid); if (!uid_entry) { pr_err("%s: failed to find uid %d\n", __func__, uid); -- GitLab From 453ac31cab34ddadf2cb91cc6cb15e56dd27c749 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Mon, 11 May 2015 17:57:52 -0700 Subject: [PATCH 0927/1442] ANDROID: proc: uid_cputime: fix show_uid_stat permission Change-Id: Ice9084e39da599261df0be6dc305b817b50cfbbf Signed-off-by: Jin Qian --- drivers/misc/uid_cputime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/uid_cputime.c b/drivers/misc/uid_cputime.c index acd7046ce497..c3f5bda9241d 100644 --- a/drivers/misc/uid_cputime.c +++ b/drivers/misc/uid_cputime.c @@ -226,7 +226,7 @@ static int __init proc_uid_cputime_init(void) proc_create_data("remove_uid_range", S_IWUGO, parent, &uid_remove_fops, NULL); - proc_create_data("show_uid_stat", S_IWUGO, parent, &uid_stat_fops, + proc_create_data("show_uid_stat", S_IRUGO, parent, &uid_stat_fops, NULL); profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block); -- GitLab From bf7cb027713ace71d8b6c909c02133f496b5cbfb Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Fri, 17 Apr 2015 16:52:54 -0700 Subject: [PATCH 0928/1442] ANDROID: uid_cputime: Extends the cputime functionality to report power per uid /proc/uid_cputime/show_uid_stats shows a third field power for each of the uids. It represents the power in the units (uAusec) Change-Id: I52fdc5e59647e9dc97561a26d56f462a2689ba9c Signed-off-by: Ruchi Kandoi --- drivers/misc/uid_cputime.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/misc/uid_cputime.c b/drivers/misc/uid_cputime.c index c3f5bda9241d..89bfba6c5b6a 100644 --- a/drivers/misc/uid_cputime.c +++ b/drivers/misc/uid_cputime.c @@ -38,6 +38,8 @@ struct uid_entry { cputime_t stime; cputime_t active_utime; cputime_t active_stime; + unsigned long long active_power; + unsigned long long power; struct hlist_node hash; }; @@ -83,6 +85,7 @@ static int uid_stat_show(struct seq_file *m, void *v) hash_for_each(hash_table, bkt, uid_entry, hash) { uid_entry->active_stime = 0; uid_entry->active_utime = 0; + uid_entry->active_power = 0; } read_lock(&tasklist_lock); @@ -100,6 +103,7 @@ static int uid_stat_show(struct seq_file *m, void *v) task_cputime_adjusted(task, &utime, &stime); uid_entry->active_utime += utime; uid_entry->active_stime += stime; + uid_entry->active_power += task->cpu_power; } read_unlock(&tasklist_lock); @@ -108,9 +112,12 @@ static int uid_stat_show(struct seq_file *m, void *v) uid_entry->active_utime; cputime_t total_stime = uid_entry->stime + uid_entry->active_stime; - seq_printf(m, "%d: %u %u\n", uid_entry->uid, + unsigned long long total_power = uid_entry->power + + uid_entry->active_power; + seq_printf(m, "%d: %u %u %llu\n", uid_entry->uid, cputime_to_usecs(total_utime), - cputime_to_usecs(total_stime)); + cputime_to_usecs(total_stime), + total_power); } mutex_unlock(&uid_lock); @@ -203,6 +210,7 @@ static int process_notifier(struct notifier_block *self, task_cputime_adjusted(task, &utime, &stime); uid_entry->utime += utime; uid_entry->stime += stime; + uid_entry->power += task->cpu_power; exit: mutex_unlock(&uid_lock); -- GitLab From 40b78cc953b2bbe54e5e08504c0daa2a62071f09 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Fri, 26 Jun 2015 14:19:21 -0700 Subject: [PATCH 0929/1442] ANDROID: uid_cputime: Avoids double accounting of process stime, utime and cpu_power in task exit. This avoids the race where a particular process is terminating and we read the show_uid_stats. At this time since the task_struct still exists and we will account for the terminating process as one of the active task, where as the stats would have been added in the task exit callback. Bug: 22064385 Change-Id: Id2ae04b33fcd230eda9683a41b6019d4dd8f5d85 Signed-off-by: Jin Qian Signed-off-by: Ruchi Kandoi --- drivers/misc/uid_cputime.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/uid_cputime.c b/drivers/misc/uid_cputime.c index 89bfba6c5b6a..012bd3505ed5 100644 --- a/drivers/misc/uid_cputime.c +++ b/drivers/misc/uid_cputime.c @@ -100,6 +100,11 @@ static int uid_stat_show(struct seq_file *m, void *v) task_uid(task))); return -ENOMEM; } + /* if this task is exiting, we have already accounted for the + * time and power. + */ + if (task->cpu_power == ULLONG_MAX) + continue; task_cputime_adjusted(task, &utime, &stime); uid_entry->active_utime += utime; uid_entry->active_stime += stime; @@ -211,6 +216,7 @@ static int process_notifier(struct notifier_block *self, uid_entry->utime += utime; uid_entry->stime += stime; uid_entry->power += task->cpu_power; + task->cpu_power = ULLONG_MAX; exit: mutex_unlock(&uid_lock); -- GitLab From be7074f15834d86bf8be6064e77112428cd963c5 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Mon, 13 Jul 2015 18:16:55 -0700 Subject: [PATCH 0930/1442] ANDROID: uid_cputime: fix cputime overflow Converting cputime_t to usec caused overflow when the value is greater than 1 hour. Use msec and convert to unsigned long long to support bigger range. Bug: 22461683 Change-Id: I853fe3e8e7dbf0d3e2cc5c6f9688a5a6e1f1fb3e Signed-off-by: Jin Qian --- drivers/misc/uid_cputime.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/misc/uid_cputime.c b/drivers/misc/uid_cputime.c index 012bd3505ed5..75843985c0e8 100644 --- a/drivers/misc/uid_cputime.c +++ b/drivers/misc/uid_cputime.c @@ -119,10 +119,12 @@ static int uid_stat_show(struct seq_file *m, void *v) uid_entry->active_stime; unsigned long long total_power = uid_entry->power + uid_entry->active_power; - seq_printf(m, "%d: %u %u %llu\n", uid_entry->uid, - cputime_to_usecs(total_utime), - cputime_to_usecs(total_stime), - total_power); + seq_printf(m, "%d: %llu %llu %llu\n", uid_entry->uid, + (unsigned long long)jiffies_to_msecs( + cputime_to_jiffies(total_utime)) * USEC_PER_MSEC, + (unsigned long long)jiffies_to_msecs( + cputime_to_jiffies(total_stime)) * USEC_PER_MSEC, + total_power); } mutex_unlock(&uid_lock); -- GitLab From 0a733775d3424677ab44d01ad2bd1d8b8faa559b Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Fri, 31 Jul 2015 10:17:54 -0700 Subject: [PATCH 0931/1442] ANDROID: uid_cputime: Iterates over all the threads instead of processes. Bug: 22833116 Change-Id: I775a18f61bd2f4df2bec23d01bd49421d0969f87 Signed-off-by: Ruchi Kandoi --- drivers/misc/uid_cputime.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/uid_cputime.c b/drivers/misc/uid_cputime.c index 75843985c0e8..43298a43ecc3 100644 --- a/drivers/misc/uid_cputime.c +++ b/drivers/misc/uid_cputime.c @@ -75,7 +75,7 @@ static struct uid_entry *find_or_register_uid(uid_t uid) static int uid_stat_show(struct seq_file *m, void *v) { struct uid_entry *uid_entry; - struct task_struct *task; + struct task_struct *task, *temp; cputime_t utime; cputime_t stime; unsigned long bkt; @@ -89,7 +89,7 @@ static int uid_stat_show(struct seq_file *m, void *v) } read_lock(&tasklist_lock); - for_each_process(task) { + do_each_thread(temp, task) { uid_entry = find_or_register_uid(from_kuid_munged( current_user_ns(), task_uid(task))); if (!uid_entry) { @@ -109,7 +109,7 @@ static int uid_stat_show(struct seq_file *m, void *v) uid_entry->active_utime += utime; uid_entry->active_stime += stime; uid_entry->active_power += task->cpu_power; - } + } while_each_thread(temp, task); read_unlock(&tasklist_lock); hash_for_each(hash_table, bkt, uid_entry, hash) { -- GitLab From 17f35eacb42c79b18ab3f75dc111856fe1a0f204 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Fri, 23 Oct 2015 17:49:11 -0700 Subject: [PATCH 0932/1442] ANDROID: uid_cputime: Check for the range while removing range of UIDs. Checking if the uid_entry->uid matches the uid intended to be removed will prevent deleting unwanted uid_entry. Type cast the key for the hashtable to the same size, as when they were inserted. This will make sure that we can find the uid_entry we want. Bug: 25195548 Change-Id: I567942123cfb20e4b61ad624da19ec4cc84642c1 Signed-off: Ruchi kandoi --- drivers/misc/uid_cputime.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/misc/uid_cputime.c b/drivers/misc/uid_cputime.c index 43298a43ecc3..c751188ce2ab 100644 --- a/drivers/misc/uid_cputime.c +++ b/drivers/misc/uid_cputime.c @@ -174,14 +174,15 @@ static ssize_t uid_remove_write(struct file *file, kstrtol(end_uid, 10, &uid_end) != 0) { return -EINVAL; } - mutex_lock(&uid_lock); for (; uid_start <= uid_end; uid_start++) { hash_for_each_possible_safe(hash_table, uid_entry, tmp, - hash, uid_start) { - hash_del(&uid_entry->hash); - kfree(uid_entry); + hash, (uid_t)uid_start) { + if (uid_start == uid_entry->uid) { + hash_del(&uid_entry->hash); + kfree(uid_entry); + } } } -- GitLab From e4395b2da362cd0375fcb918927200822124a8cd Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 14 Dec 2015 11:56:35 +0530 Subject: [PATCH 0933/1442] ANDROID: uid_cputime: skip power reporting per uid for now AOSP's cpufreq_stats patch to report power/current per cpufreq is broken at the moment so skip power reporting for now. Change-Id: I07779511f51ff3a9303dc98a3b71ac0a5882a4e9 Signed-off-by: Amit Pundir --- drivers/misc/uid_cputime.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/misc/uid_cputime.c b/drivers/misc/uid_cputime.c index c751188ce2ab..c1ad5246f564 100644 --- a/drivers/misc/uid_cputime.c +++ b/drivers/misc/uid_cputime.c @@ -38,8 +38,6 @@ struct uid_entry { cputime_t stime; cputime_t active_utime; cputime_t active_stime; - unsigned long long active_power; - unsigned long long power; struct hlist_node hash; }; @@ -85,7 +83,6 @@ static int uid_stat_show(struct seq_file *m, void *v) hash_for_each(hash_table, bkt, uid_entry, hash) { uid_entry->active_stime = 0; uid_entry->active_utime = 0; - uid_entry->active_power = 0; } read_lock(&tasklist_lock); @@ -100,15 +97,9 @@ static int uid_stat_show(struct seq_file *m, void *v) task_uid(task))); return -ENOMEM; } - /* if this task is exiting, we have already accounted for the - * time and power. - */ - if (task->cpu_power == ULLONG_MAX) - continue; task_cputime_adjusted(task, &utime, &stime); uid_entry->active_utime += utime; uid_entry->active_stime += stime; - uid_entry->active_power += task->cpu_power; } while_each_thread(temp, task); read_unlock(&tasklist_lock); @@ -117,14 +108,11 @@ static int uid_stat_show(struct seq_file *m, void *v) uid_entry->active_utime; cputime_t total_stime = uid_entry->stime + uid_entry->active_stime; - unsigned long long total_power = uid_entry->power + - uid_entry->active_power; - seq_printf(m, "%d: %llu %llu %llu\n", uid_entry->uid, + seq_printf(m, "%d: %llu %llu\n", uid_entry->uid, (unsigned long long)jiffies_to_msecs( cputime_to_jiffies(total_utime)) * USEC_PER_MSEC, (unsigned long long)jiffies_to_msecs( - cputime_to_jiffies(total_stime)) * USEC_PER_MSEC, - total_power); + cputime_to_jiffies(total_stime)) * USEC_PER_MSEC); } mutex_unlock(&uid_lock); @@ -218,8 +206,6 @@ static int process_notifier(struct notifier_block *self, task_cputime_adjusted(task, &utime, &stime); uid_entry->utime += utime; uid_entry->stime += stime; - uid_entry->power += task->cpu_power; - task->cpu_power = ULLONG_MAX; exit: mutex_unlock(&uid_lock); -- GitLab From 122dd3c680b075c8a177462b1d813385a9c76a9b Mon Sep 17 00:00:00 2001 From: Tushar Behera Date: Mon, 26 Mar 2012 16:54:15 +0530 Subject: [PATCH 0934/1442] ANDROID: security: Add proper checks for Android specific capability checks Commit b641072 ("security: Add AID_NET_RAW and AID_NET_ADMIN capability check in cap_capable().") introduces additional checks for AID_NET_xxx macros. Since the header file including those macros are conditionally included, the checks should also be conditionally executed. Change-Id: Iaec5208d5b95a46b1ac3f2db8449c661e803fa5b Signed-off-by: Tushar Behera Signed-off-by: Andrey Konovalov --- security/commoncap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/security/commoncap.c b/security/commoncap.c index 8816e499335e..a8e4aacf0b0c 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -77,10 +77,12 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, { struct user_namespace *ns = targ_ns; +#ifdef CONFIG_ANDROID_PARANOID_NETWORK if (cap == CAP_NET_RAW && in_egroup_p(AID_NET_RAW)) return 0; if (cap == CAP_NET_ADMIN && in_egroup_p(AID_NET_ADMIN)) return 0; +#endif /* See if cred has the capability in the target user namespace * by examining the target user namespace and all of the target -- GitLab From c6023aa6b8437c36b18cdb1a8d11b7d57d9375f7 Mon Sep 17 00:00:00 2001 From: San Mehat Date: Tue, 25 Aug 2009 16:52:22 -0700 Subject: [PATCH 0935/1442] ANDROID: process: Add display of memory around registers when displaying regs. This is extremely useful in diagnosing remote crashes, and is based heavily on original work by . Signed-off-by: San Mehat Cc: Michael Davidson [ARM] process: Use uber-safe probe_kernel_address() to read mem when dumping. This prevents the dump from taking pagefaults / external aborts. Change-Id: I8df76e8638780f94fb1bd7ea4471e3f7b01df950 Signed-off-by: San Mehat --- arch/arm/kernel/process.c | 73 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index aae93b483f86..c6324b534b9d 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -93,6 +93,77 @@ void arch_cpu_idle_exit(void) idle_notifier_call_chain(IDLE_END); } +/* + * dump a block of kernel memory from around the given address + */ +static void show_data(unsigned long addr, int nbytes, const char *name) +{ + int i, j; + int nlines; + u32 *p; + + /* + * don't attempt to dump non-kernel addresses or + * values that are probably just small negative numbers + */ + if (addr < PAGE_OFFSET || addr > -256UL) + return; + + printk("\n%s: %#lx:\n", name, addr); + + /* + * round address down to a 32 bit boundary + * and always dump a multiple of 32 bytes + */ + p = (u32 *)(addr & ~(sizeof(u32) - 1)); + nbytes += (addr & (sizeof(u32) - 1)); + nlines = (nbytes + 31) / 32; + + + for (i = 0; i < nlines; i++) { + /* + * just display low 16 bits of address to keep + * each line of the dump < 80 characters + */ + printk("%04lx ", (unsigned long)p & 0xffff); + for (j = 0; j < 8; j++) { + u32 data; + if (probe_kernel_address(p, data)) { + printk(" ********"); + } else { + printk(" %08x", data); + } + ++p; + } + printk("\n"); + } +} + +static void show_extra_register_data(struct pt_regs *regs, int nbytes) +{ + mm_segment_t fs; + + fs = get_fs(); + set_fs(KERNEL_DS); + show_data(regs->ARM_pc - nbytes, nbytes * 2, "PC"); + show_data(regs->ARM_lr - nbytes, nbytes * 2, "LR"); + show_data(regs->ARM_sp - nbytes, nbytes * 2, "SP"); + show_data(regs->ARM_ip - nbytes, nbytes * 2, "IP"); + show_data(regs->ARM_fp - nbytes, nbytes * 2, "FP"); + show_data(regs->ARM_r0 - nbytes, nbytes * 2, "R0"); + show_data(regs->ARM_r1 - nbytes, nbytes * 2, "R1"); + show_data(regs->ARM_r2 - nbytes, nbytes * 2, "R2"); + show_data(regs->ARM_r3 - nbytes, nbytes * 2, "R3"); + show_data(regs->ARM_r4 - nbytes, nbytes * 2, "R4"); + show_data(regs->ARM_r5 - nbytes, nbytes * 2, "R5"); + show_data(regs->ARM_r6 - nbytes, nbytes * 2, "R6"); + show_data(regs->ARM_r7 - nbytes, nbytes * 2, "R7"); + show_data(regs->ARM_r8 - nbytes, nbytes * 2, "R8"); + show_data(regs->ARM_r9 - nbytes, nbytes * 2, "R9"); + show_data(regs->ARM_r10 - nbytes, nbytes * 2, "R10"); + set_fs(fs); +} + void __show_regs(struct pt_regs *regs) { unsigned long flags; @@ -184,6 +255,8 @@ void __show_regs(struct pt_regs *regs) printk("Control: %08x%s\n", ctrl, buf); } #endif + + show_extra_register_data(regs, 128); } void show_regs(struct pt_regs * regs) -- GitLab From 6d6ef30ebec1f8440138813f99249b9640d14f6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Tue, 9 Jun 2009 20:17:45 -0700 Subject: [PATCH 0936/1442] ANDROID: Optionally flush entire dcache from v6_dma_flush_range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CACHE_FLUSH_RANGE_LIMIT is defined, then the entire dcache will be flushed if the requested range is larger than this limit. Change-Id: I29277d645a9d6716b1952cf3b870c78496261dd0 Signed-off-by: Arve Hjønnevåg --- arch/arm/mm/cache-v6.S | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 24659952c278..11da0f50a1fe 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -270,6 +270,11 @@ v6_dma_clean_range: * - end - virtual end address of region */ ENTRY(v6_dma_flush_range) +#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT + sub r2, r1, r0 + cmp r2, #CONFIG_CACHE_FLUSH_RANGE_LIMIT + bhi v6_dma_flush_dcache_all +#endif #ifdef CONFIG_DMA_CACHE_RWFO ldrb r2, [r0] @ read for ownership strb r2, [r0] @ write for ownership @@ -292,6 +297,18 @@ ENTRY(v6_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain write buffer ret lr +#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT +v6_dma_flush_dcache_all: + mov r0, #0 +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate +#else + mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr +#endif + /* * dma_map_area(start, size, dir) * - start - kernel virtual start address -- GitLab From f9deaf1b9867cf2b91fcfbfc226e8721c593d5d5 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 9 May 2005 14:10:26 -0700 Subject: [PATCH 0937/1442] ANDROID: ARM: Make low-level printk work Makes low-level printk work. Signed-off-by: Tony Lindgren --- kernel/printk/printk.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f7a55e9ff2f7..d4c86643258d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -56,6 +56,10 @@ #include "braille.h" #include "internal.h" +#ifdef CONFIG_DEBUG_LL +extern void printascii(char *); +#endif + int console_printk[4] = { CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ @@ -1842,6 +1846,10 @@ asmlinkage int vprintk_emit(int facility, int level, */ text_len = vscnprintf(text, sizeof(textbuf), fmt, args); +#ifdef CONFIG_DEBUG_LL + printascii(text); +#endif + /* mark and strip a trailing newline */ if (text_len && text[text_len-1] == '\n') { text_len--; -- GitLab From 2444662d5c77568a03e44ef54fda3c1c234caa44 Mon Sep 17 00:00:00 2001 From: Dima Zavin Date: Tue, 23 Aug 2011 15:56:50 -0700 Subject: [PATCH 0938/1442] ANDROID: ARM: add option to flush console before reboot If the console_lock was held while the system was rebooted, the messages in the temporary logbuffer would not have propogated to all the console drivers. This force releases the console lock if it failed to be acquired. Change-Id: I193dcf7b968be17966833e50b8b8bc70d5d9fe89 Signed-off-by: Dima Zavin --- arch/arm/Kconfig | 9 +++++++++ arch/arm/kernel/reboot.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b5d529fdffab..88830c13930e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1836,6 +1836,15 @@ config XEN help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM. +config ARM_FLUSH_CONSOLE_ON_RESTART + bool "Force flush the console on restart" + help + If the console is locked while the system is rebooted, the messages + in the temporary logbuffer would not have propogated to all the + console drivers. This option forces the console lock to be + released if it failed to be acquired, which will cause all the + pending messages to be flushed. + endmenu menu "Boot options" diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c index 3fa867a2aae6..d704df89a546 100644 --- a/arch/arm/kernel/reboot.c +++ b/arch/arm/kernel/reboot.c @@ -6,6 +6,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include #include #include #include @@ -122,6 +123,31 @@ void machine_power_off(void) pm_power_off(); } +#ifdef CONFIG_ARM_FLUSH_CONSOLE_ON_RESTART +void arm_machine_flush_console(void) +{ + printk("\n"); + pr_emerg("Restarting %s\n", linux_banner); + if (console_trylock()) { + console_unlock(); + return; + } + + mdelay(50); + + local_irq_disable(); + if (!console_trylock()) + pr_emerg("arm_restart: Console was locked! Busting\n"); + else + pr_emerg("arm_restart: Console was locked!\n"); + console_unlock(); +} +#else +void arm_machine_flush_console(void) +{ +} +#endif + /* * Restart requires that the secondary CPUs stop performing any activity * while the primary CPU resets the system. Systems with a single CPU can @@ -138,6 +164,10 @@ void machine_restart(char *cmd) local_irq_disable(); smp_send_stop(); + /* Flush the console to make sure all the relevant messages make it + * out to the console drivers */ + arm_machine_flush_console(); + if (arm_pm_restart) arm_pm_restart(reboot_mode, cmd); else -- GitLab From 813f2675335f8035b7e7d94c7d7d8f046624f6ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Fri, 14 Jun 2013 19:54:40 -0700 Subject: [PATCH 0939/1442] ANDROID: ARM: Fix "Make low-level printk work" to use a separate config option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I5ca8db61b595adc642a07ea187bd41fd7636840e Signed-off-by: Arve Hjønnevåg --- arch/arm/Kconfig.debug | 8 ++++++++ kernel/printk/printk.c | 10 +++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index d83f7c369e51..17dcd9416db3 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1723,6 +1723,14 @@ config EARLY_PRINTK kernel low-level debugging functions. Add earlyprintk to your kernel parameters to enable this console. +config EARLY_PRINTK_DIRECT + bool "Early printk direct" + depends on DEBUG_LL + help + Say Y here if you want to have an early console using the + kernel low-level debugging functions and EARLY_PRINTK is + not early enough. + config ARM_KPROBES_TEST tristate "Kprobes test module" depends on KPROBES && MODULES diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index d4c86643258d..b38f3fb2a006 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -56,7 +56,7 @@ #include "braille.h" #include "internal.h" -#ifdef CONFIG_DEBUG_LL +#ifdef CONFIG_EARLY_PRINTK_DIRECT extern void printascii(char *); #endif @@ -1846,10 +1846,6 @@ asmlinkage int vprintk_emit(int facility, int level, */ text_len = vscnprintf(text, sizeof(textbuf), fmt, args); -#ifdef CONFIG_DEBUG_LL - printascii(text); -#endif - /* mark and strip a trailing newline */ if (text_len && text[text_len-1] == '\n') { text_len--; @@ -1878,6 +1874,10 @@ asmlinkage int vprintk_emit(int facility, int level, } } +#ifdef CONFIG_EARLY_PRINTK_DIRECT + printascii(text); +#endif + if (level == LOGLEVEL_DEFAULT) level = default_message_loglevel; -- GitLab From f556868afe08f8f395e47da28ece8545337f154b Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Mon, 29 Apr 2013 16:07:00 -0700 Subject: [PATCH 0940/1442] ANDROID: ARM: fault: assume no context when IRQs are disabled during data abort. Bail out early if IRQs are disabled in do_page_fault or else [14415.157266] BUG: sleeping function called from invalid context at arch/arm/mm/fault.c:301 Russell King's idea from http://comments.gmane.org/gmane.linux.ports.arm.omap/59256 Signed-off-by: JP Abgrall --- arch/arm/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 3a2e678b8d30..217ddb23fdf2 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -273,10 +273,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) local_irq_enable(); /* - * If we're in an interrupt or have no user + * If we're in an interrupt, or have no irqs, or have no user * context, we must not take the fault.. */ - if (faulthandler_disabled() || !mm) + if (faulthandler_disabled() || irqs_disabled() || !mm) goto no_context; if (user_mode(regs)) -- GitLab From 91d8631417bdfbee010fd0be4b19dee7cecaeb43 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Thu, 28 Aug 2014 14:00:10 -0700 Subject: [PATCH 0941/1442] ANDROID: arm64: check for upper PAGE_SHIFT bits in pfn_valid() pfn_valid() returns a false positive when the lower (64 - PAGE_SHIFT) bits match a valid pfn but some of the upper bits are set. This caused a kernel panic in kpageflags_read() when a userspace utility parsed /proc/*/pagemap, neglected to discard the upper flag bits, and tried to lseek()+read() from the corresponding offset in /proc/kpageflags. A valid pfn will never have the upper PAGE_SHIFT bits set, so simply check for this before passing the pfn to memblock_is_memory(). Change-Id: Ief5d8cd4dd93cbecd545a634a8d5885865cb5970 Signed-off-by: Greg Hackmann --- arch/arm64/mm/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 212c4d1e2f26..af38d027fe2e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -145,9 +145,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #endif /* CONFIG_NUMA */ #ifdef CONFIG_HAVE_ARCH_PFN_VALID +#define PFN_MASK ((1UL << (64 - PAGE_SHIFT)) - 1) + int pfn_valid(unsigned long pfn) { - return memblock_is_map_memory(pfn << PAGE_SHIFT); + return (pfn & PFN_MASK) == pfn && memblock_is_map_memory(pfn << PAGE_SHIFT); } EXPORT_SYMBOL(pfn_valid); #endif -- GitLab From c69559c952ecbdea42048650ec8db1278477dd3f Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Tue, 9 Sep 2014 17:36:05 -0700 Subject: [PATCH 0942/1442] ANDROID: arm64: process: dump memory around registers when displaying regs A port of 8608d7c4418c75841c562a90cddd9beae5798a48 to ARM64. Both the original code and this port are limited to dumping kernel addresses, so don't bother if the registers are from a userspace process. Change-Id: Idc76804c54efaaeb70311cbb500c54db6dac4525 Signed-off-by: Greg Hackmann --- arch/arm64/kernel/process.c | 66 +++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 01753cd7d3f0..2a474166d384 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -166,6 +166,70 @@ void machine_restart(char *cmd) while (1); } +/* + * dump a block of kernel memory from around the given address + */ +static void show_data(unsigned long addr, int nbytes, const char *name) +{ + int i, j; + int nlines; + u32 *p; + + /* + * don't attempt to dump non-kernel addresses or + * values that are probably just small negative numbers + */ + if (addr < PAGE_OFFSET || addr > -256UL) + return; + + printk("\n%s: %#lx:\n", name, addr); + + /* + * round address down to a 32 bit boundary + * and always dump a multiple of 32 bytes + */ + p = (u32 *)(addr & ~(sizeof(u32) - 1)); + nbytes += (addr & (sizeof(u32) - 1)); + nlines = (nbytes + 31) / 32; + + + for (i = 0; i < nlines; i++) { + /* + * just display low 16 bits of address to keep + * each line of the dump < 80 characters + */ + printk("%04lx ", (unsigned long)p & 0xffff); + for (j = 0; j < 8; j++) { + u32 data; + if (probe_kernel_address(p, data)) { + printk(" ********"); + } else { + printk(" %08x", data); + } + ++p; + } + printk("\n"); + } +} + +static void show_extra_register_data(struct pt_regs *regs, int nbytes) +{ + mm_segment_t fs; + unsigned int i; + + fs = get_fs(); + set_fs(KERNEL_DS); + show_data(regs->pc - nbytes, nbytes * 2, "PC"); + show_data(regs->regs[30] - nbytes, nbytes * 2, "LR"); + show_data(regs->sp - nbytes, nbytes * 2, "SP"); + for (i = 0; i < 30; i++) { + char name[4]; + snprintf(name, sizeof(name), "X%u", i); + show_data(regs->regs[i] - nbytes, nbytes * 2, name); + } + set_fs(fs); +} + void __show_regs(struct pt_regs *regs) { int i, top_reg; @@ -201,6 +265,8 @@ void __show_regs(struct pt_regs *regs) pr_cont("\n"); } + if (!user_mode(regs)) + show_extra_register_data(regs, 128); printk("\n"); } -- GitLab From a9c2864fd7ddc4fad1eef60905cf3d8f695e0181 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Mon, 1 Dec 2014 16:13:30 -0800 Subject: [PATCH 0943/1442] ANDROID: arch: arm64: force -fno-pic The aarch64-linux-android- toolchain enables -fpic by default. -fpic isn't needed for the kernel and breaks CONFIG_JUMP_LABEL, so turn it off. Change-Id: I685da1dc60e4cf1e9abcfb56e03654675ac02a0c Signed-off-by: Greg Hackmann --- arch/arm64/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 3635b8662724..2a79e2f665d5 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -40,6 +40,7 @@ endif KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) +KBUILD_CFLAGS += -fno-pic KBUILD_AFLAGS += $(lseinstr) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) -- GitLab From 7663ed4f922636bce91b7543665d4c55b5b54931 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Tue, 29 Sep 2015 18:57:35 -0700 Subject: [PATCH 0944/1442] ANDROID: arm64: pass return address to dma_common_contiguous_remap Added return address to show caller function in /proc/vmallocinfo Change-Id: Ieb0bbf6ec82b561cea6ff18f0516744050dfc269 --- arch/arm64/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3f74d0d98de6..3b7acaa8f0e8 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -174,7 +174,7 @@ static void *__dma_alloc(struct device *dev, size_t size, /* create a coherent mapping */ page = virt_to_page(ptr); coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, - prot, NULL); + prot, __builtin_return_address(0)); if (!coherent_ptr) goto no_map; -- GitLab From c792cb6e6c94e3010f43d35394bec4d1abaccf47 Mon Sep 17 00:00:00 2001 From: Benoit Goby Date: Fri, 8 Nov 2013 15:24:19 -0800 Subject: [PATCH 0945/1442] ANDROID: ARM: Fix dtb list when DTB_IMAGE_NAMES is empty In the 3.10 kernel, dtb-y is not defined in Makefile.boot anymore but in dts/Makefile, so it needs to be included too. Change-Id: I6d6fccf933709bcb6220ce8f12b4b9e2a7c40d63 Signed-off-by: Benoit Goby --- arch/arm/boot/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 50f8d1be7fcb..42f3fdfc2a99 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -16,6 +16,7 @@ OBJCOPYFLAGS :=-O binary -R .comment -S ifneq ($(MACHINE),) include $(MACHINE)/Makefile.boot endif +include $(srctree)/arch/arm/boot/dts/Makefile # Note: the following conditions must always be true: # ZRELADDR == virt_to_phys(PAGE_OFFSET + TEXT_OFFSET) -- GitLab From 31747b73fc74dae5df1e0cdda9c9de71dff7dbb4 Mon Sep 17 00:00:00 2001 From: Erik Gilling Date: Mon, 25 Mar 2013 15:04:41 -0700 Subject: [PATCH 0946/1442] ANDROID: ARM: add config option to build zImage/dtb combo Allows a defconfig to set a default dtb to concatenate with a zImage to create a zImage-dtb. Signed-off-by: Erik Gilling Change-Id: I34b643b1c49228fbae88a56e46c93c478089620d --- arch/arm/Kconfig | 15 +++++++++++++++ arch/arm/Makefile | 2 ++ 2 files changed, 17 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 88830c13930e..2656c6642893 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1873,6 +1873,21 @@ config DEPRECATED_PARAM_STRUCT This was deprecated in 2001 and announced to live on for 5 years. Some old boot loaders still use this way. +config BUILD_ARM_APPENDED_DTB_IMAGE + bool "Build a concatenated zImage/dtb by default" + depends on OF + help + Enabling this option will cause a concatenated zImage and DTB to + be built by default (instead of a standalone zImage.) The image + will built in arch/arm/boot/zImage-dtb. + +config BUILD_ARM_APPENDED_DTB_IMAGE_NAME + string "Default dtb name" + depends on BUILD_ARM_APPENDED_DTB_IMAGE + help + name of the dtb to append when building a concatenated + zImage/dtb. + # Compressed boot loader in ROM. Yes, we really want to ask about # TEXT and BSS so we preserve their values in the config files. config ZBOOT_ROM_TEXT diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 6be9ee148b78..0ff187a4ed71 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -298,6 +298,8 @@ libs-y := arch/arm/lib/ $(libs-y) # Default target when executing plain make ifeq ($(CONFIG_XIP_KERNEL),y) KBUILD_IMAGE := xipImage +else ifeq ($(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE),y) +KBUILD_IMAGE := zImage-dtb.$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAME) else KBUILD_IMAGE := zImage endif -- GitLab From ccd6cd38fb19455feff6182956d3907a5fff68a9 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 17 Apr 2013 16:58:36 -0700 Subject: [PATCH 0947/1442] ANDROID: ARM: convert build of appended dtb zImage to list of dtbs Allow CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAMES to specify a space separated list of dtbs to append to the zImage, and name the resulting file zImage-dtb Change-Id: Ied5d0bafbd1d01fc1f109c15c4283de7029903c9 Signed-off-by: Colin Cross --- arch/arm/Kconfig | 14 +++++++------- arch/arm/Makefile | 5 ++++- arch/arm/boot/.gitignore | 1 + arch/arm/boot/Makefile | 12 ++++++++++++ arch/arm/boot/dts/Makefile | 12 +++++++++++- scripts/Makefile.lib | 6 ++++++ 6 files changed, 41 insertions(+), 9 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2656c6642893..00be82f3929f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1877,16 +1877,16 @@ config BUILD_ARM_APPENDED_DTB_IMAGE bool "Build a concatenated zImage/dtb by default" depends on OF help - Enabling this option will cause a concatenated zImage and DTB to - be built by default (instead of a standalone zImage.) The image - will built in arch/arm/boot/zImage-dtb. + Enabling this option will cause a concatenated zImage and list of + DTBs to be built by default (instead of a standalone zImage.) + The image will built in arch/arm/boot/zImage-dtb -config BUILD_ARM_APPENDED_DTB_IMAGE_NAME - string "Default dtb name" +config BUILD_ARM_APPENDED_DTB_IMAGE_NAMES + string "Default dtb names" depends on BUILD_ARM_APPENDED_DTB_IMAGE help - name of the dtb to append when building a concatenated - zImage/dtb. + Space separated list of names of dtbs to append when + building a concatenated zImage-dtb. # Compressed boot loader in ROM. Yes, we really want to ask about # TEXT and BSS so we preserve their values in the config files. diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 0ff187a4ed71..b53a7b4559d3 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -299,7 +299,7 @@ libs-y := arch/arm/lib/ $(libs-y) ifeq ($(CONFIG_XIP_KERNEL),y) KBUILD_IMAGE := xipImage else ifeq ($(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE),y) -KBUILD_IMAGE := zImage-dtb.$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAME) +KBUILD_IMAGE := zImage-dtb else KBUILD_IMAGE := zImage endif @@ -351,6 +351,9 @@ ifeq ($(CONFIG_VDSO),y) $(Q)$(MAKE) $(build)=arch/arm/vdso $@ endif +zImage-dtb: vmlinux scripts dtbs + $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arm/boot/.gitignore b/arch/arm/boot/.gitignore index 3c79f85975aa..ad7a0253ea96 100644 --- a/arch/arm/boot/.gitignore +++ b/arch/arm/boot/.gitignore @@ -4,3 +4,4 @@ xipImage bootpImage uImage *.dtb +zImage-dtb \ No newline at end of file diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 42f3fdfc2a99..da75630c440d 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -30,6 +30,14 @@ export ZRELADDR INITRD_PHYS PARAMS_PHYS targets := Image zImage xipImage bootpImage uImage +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) +endif +DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST)) + ifeq ($(CONFIG_XIP_KERNEL),y) $(obj)/xipImage: vmlinux FORCE @@ -56,6 +64,10 @@ $(obj)/compressed/vmlinux: $(obj)/Image FORCE $(obj)/zImage: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) +$(obj)/zImage-dtb: $(obj)/zImage $(DTB_OBJS) FORCE + $(call if_changed,cat) + @echo ' Kernel: $@ is ready' + endif ifneq ($(LOADADDR),) diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index c558ba75cbcc..5af3ec14af9b 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -959,5 +959,15 @@ endif dtstree := $(srctree)/$(src) dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) -always := $(dtb-y) +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) +endif + +targets += dtbs dtbs_install +targets += $(DTB_LIST) + +always := $(DTB_LIST) clean-files := *.dtb diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 0a07f9014944..d3d3320722cb 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -314,6 +314,12 @@ $(obj)/%.dtb: $(src)/%.dts FORCE dtc-tmp = $(subst $(comma),_,$(dot-target).dts.tmp) +# cat +# --------------------------------------------------------------------------- +# Concatentate multiple files together +quiet_cmd_cat = CAT $@ +cmd_cat = (cat $(filter-out FORCE,$^) > $@) || (rm -f $@; false) + # Bzip2 # --------------------------------------------------------------------------- -- GitLab From 911da239d131256d492628137fe05a59419fe43f Mon Sep 17 00:00:00 2001 From: Alex Ray Date: Mon, 17 Mar 2014 13:44:01 -0700 Subject: [PATCH 0948/1442] ANDROID: ARM64: add option to build Image.gz/dtb combo Allows a defconfig to set a list of dtbs to concatenate with an Image.gz to create a Image.gz-dtb. Includes 8adb162 arm64: Fix correct dtb clean-files location Change-Id: I0b462322d5c970f1fdf37baffece7ad058099f4a Signed-off-by: Alex Ray --- arch/arm64/Kconfig | 15 +++++++++++++++ arch/arm64/Makefile | 8 ++++++++ arch/arm64/boot/.gitignore | 1 + arch/arm64/boot/Makefile | 13 +++++++++++++ arch/arm64/boot/dts/Makefile | 14 ++++++++++++++ 5 files changed, 51 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 969ef880d234..865c961d8cb8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -988,6 +988,21 @@ config DMI However, even with this option, the resultant kernel should continue to boot on existing non-UEFI platforms. +config BUILD_ARM64_APPENDED_DTB_IMAGE + bool "Build a concatenated Image.gz/dtb by default" + depends on OF + help + Enabling this option will cause a concatenated Image.gz and list of + DTBs to be built by default (instead of a standalone Image.gz.) + The image will built in arch/arm64/boot/Image.gz-dtb + +config BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES + string "Default dtb names" + depends on BUILD_ARM64_APPENDED_DTB_IMAGE + help + Space separated list of names of dtbs to append when + building a concatenated Image.gz-dtb. + endmenu menu "Userspace binary formats" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 2a79e2f665d5..085f79235be0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -95,7 +95,12 @@ libs-y := arch/arm64/lib/ $(libs-y) core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a # Default target when executing plain make +ifeq ($(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE),y) +KBUILD_IMAGE := Image.gz-dtb +else KBUILD_IMAGE := Image.gz +endif + KBUILD_DTBS := dtbs all: $(KBUILD_IMAGE) $(KBUILD_DTBS) @@ -122,6 +127,9 @@ dtbs: prepare scripts dtbs_install: $(Q)$(MAKE) $(dtbinst)=$(boot)/dts +Image.gz-dtb: vmlinux scripts dtbs + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore index 8dab0bb6ae66..eb3551131b1e 100644 --- a/arch/arm64/boot/.gitignore +++ b/arch/arm64/boot/.gitignore @@ -1,2 +1,3 @@ Image Image.gz +Image.gz-dtb diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 1f012c506434..5bb65a9e5d13 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -14,10 +14,20 @@ # Based on the ia64 boot/Makefile. # +include $(srctree)/arch/arm64/boot/dts/Makefile + OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S targets := Image Image.gz +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) +endif +DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST)) + $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) @@ -36,6 +46,9 @@ $(obj)/Image.lzma: $(obj)/Image FORCE $(obj)/Image.lzo: $(obj)/Image FORCE $(call if_changed,lzo) +$(obj)/Image.gz-dtb: $(obj)/Image.gz $(DTB_OBJS) FORCE + $(call if_changed,cat) + install: $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ $(obj)/Image System.map "$(INSTALL_PATH)" diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index 6684f97c2722..7ad2cf0a607b 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile @@ -28,3 +28,17 @@ dtstree := $(srctree)/$(src) dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(foreach d,$(dts-dirs), $(wildcard $(dtstree)/$(d)/*.dts))) always := $(dtb-y) + +targets += dtbs + +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) +endif +targets += $(DTB_LIST) + +dtbs: $(addprefix $(obj)/, $(DTB_LIST)) + +clean-files := dts/*.dtb *.dtb -- GitLab From ba01a66beb946d93592f23cef2cb9a0b1bc63211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Fri, 30 Nov 2012 17:05:40 -0800 Subject: [PATCH 0949/1442] ANDROID: ARM: decompressor: Flush tlb before swiching domain 0 to client mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the bootloader used a page table that is incompatible with domain 0 in client mode, and boots with the mmu on, then swithing domain 0 to client mode causes a fault if we don't flush the tlb after updating the page table pointer. v2: Add ISB before loading dacr. Signed-off-by: Arve Hjønnevåg --- arch/arm/boot/compressed/head.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index fc6d541549a2..51fc9fb6dc2c 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -781,6 +781,8 @@ __armv7_mmu_cache_on: bic r6, r6, #1 << 31 @ 32-bit translation system bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0 mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer + mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs + mcr p15, 0, r0, c7, c5, 4 @ ISB mcrne p15, 0, r1, c3, c0, 0 @ load domain access control mcrne p15, 0, r6, c2, c0, 2 @ load ttb control #endif -- GitLab From 4db7f78711fde5d1f7812a27ab2f7862215ab671 Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Thu, 2 Feb 2012 22:58:28 -0800 Subject: [PATCH 0950/1442] ANDROID: of: Support CONFIG_CMDLINE_EXTEND config option The old logic assumes CMDLINE_FROM_BOOTLOADER vs. CMDLINE_FORCE and ignores CMDLINE_EXTEND. Here's the old logic: - CONFIG_CMDLINE_FORCE=true CONFIG_CMDLINE - dt bootargs=non-empty: dt bootargs - dt bootargs=empty, @data is non-empty string @data is left unchanged - dt bootargs=empty, @data is empty string CONFIG_CMDLINE (or "" if that's not defined) The new logic is now documented in of_fdt.h and is copied here for reference: - CONFIG_CMDLINE_FORCE=true CONFIG_CMDLINE - CONFIG_CMDLINE_EXTEND=true, @data is non-empty string @data + dt bootargs (even if dt bootargs are empty) - CONFIG_CMDLINE_EXTEND=true, @data is empty string CONFIG_CMDLINE + dt bootargs (even if dt bootargs are empty) - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=non-empty: dt bootargs - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is non-empty string @data is left unchanged - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is empty string CONFIG_CMDLINE (or "" if that's not defined) Signed-off-by: Doug Anderson CC: devicetree-discuss@lists.ozlabs.org CC: Grant Likely CC: Benjamin Herrenschmidt CC: Rob Herring Change-Id: I40ace250847f813358125dfcaa8998fd32cf7ea3 Signed-off-by: Colin Cross --- drivers/of/fdt.c | 62 +++++++++++++++++++++++++++--------------- include/linux/of_fdt.h | 21 ++++++++++++++ 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index c89d5d231a0e..d3ac492fbb59 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1054,6 +1054,29 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname, return 0; } +/* + * Convert configs to something easy to use in C code + */ +#if defined(CONFIG_CMDLINE_FORCE) +static const int overwrite_incoming_cmdline = 1; +static const int read_dt_cmdline; +static const int concat_cmdline; +#elif defined(CONFIG_CMDLINE_EXTEND) +static const int overwrite_incoming_cmdline; +static const int read_dt_cmdline = 1; +static const int concat_cmdline = 1; +#else /* CMDLINE_FROM_BOOTLOADER */ +static const int overwrite_incoming_cmdline; +static const int read_dt_cmdline = 1; +static const int concat_cmdline; +#endif + +#ifdef CONFIG_CMDLINE +static const char *config_cmdline = CONFIG_CMDLINE; +#else +static const char *config_cmdline = ""; +#endif + int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data) { @@ -1068,28 +1091,23 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, early_init_dt_check_for_initrd(node); - /* Retrieve command line */ - p = of_get_flat_dt_prop(node, "bootargs", &l); - if (p != NULL && l > 0) - strlcpy(data, p, min((int)l, COMMAND_LINE_SIZE)); - - /* - * CONFIG_CMDLINE is meant to be a default in case nothing else - * managed to set the command line, unless CONFIG_CMDLINE_FORCE - * is set in which case we override whatever was found earlier. - */ -#ifdef CONFIG_CMDLINE -#if defined(CONFIG_CMDLINE_EXTEND) - strlcat(data, " ", COMMAND_LINE_SIZE); - strlcat(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#elif defined(CONFIG_CMDLINE_FORCE) - strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#else - /* No arguments from boot loader, use kernel's cmdl*/ - if (!((char *)data)[0]) - strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif -#endif /* CONFIG_CMDLINE */ + /* Put CONFIG_CMDLINE in if forced or if data had nothing in it to start */ + if (overwrite_incoming_cmdline || !((char *)data)[0]) + strlcpy(data, config_cmdline, COMMAND_LINE_SIZE); + + /* Retrieve command line unless forcing */ + if (read_dt_cmdline) { + p = of_get_flat_dt_prop(node, "bootargs", &l); + if (p != NULL && l > 0) { + if (concat_cmdline) { + strlcat(data, " ", COMMAND_LINE_SIZE); + strlcat(data, p, min_t(int, (int)l, + COMMAND_LINE_SIZE)); + } else + strlcpy(data, p, min_t(int, (int)l, + COMMAND_LINE_SIZE)); + } + } pr_debug("Command line is: %s\n", (char*)data); diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 4341f32516d8..501d461a6a1d 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -63,6 +63,27 @@ extern int of_flat_dt_match(unsigned long node, const char *const *matches); extern unsigned long of_get_flat_dt_root(void); extern int of_get_flat_dt_size(void); +/* + * early_init_dt_scan_chosen - scan the device tree for ramdisk and bootargs + * + * The boot arguments will be placed into the memory pointed to by @data. + * That memory should be COMMAND_LINE_SIZE big and initialized to be a valid + * (possibly empty) string. Logic for what will be in @data after this + * function finishes: + * + * - CONFIG_CMDLINE_FORCE=true + * CONFIG_CMDLINE + * - CONFIG_CMDLINE_EXTEND=true, @data is non-empty string + * @data + dt bootargs (even if dt bootargs are empty) + * - CONFIG_CMDLINE_EXTEND=true, @data is empty string + * CONFIG_CMDLINE + dt bootargs (even if dt bootargs are empty) + * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=non-empty: + * dt bootargs + * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is non-empty string + * @data is left unchanged + * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is empty string + * CONFIG_CMDLINE (or "" if that's not defined) + */ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data); extern int early_init_dt_scan_memory(unsigned long node, const char *uname, -- GitLab From 8e56764990dfa9b76e3c5193bab0ee9cd0fe5ec9 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 2 Apr 2014 18:02:15 -0700 Subject: [PATCH 0951/1442] ANDROID: ARM64: copy CONFIG_CMDLINE_EXTEND from ARM Copy the config choice for CONFIG_CMDLINE_EXTEND from arch/arm/Kconfig, including CONFIG_CMDLINE_FROM_BOOTLOADER as the default. These will be used by drivers/of/fdt.c. Change-Id: I8416038498ddf8fc1e99ab06109825eb1492aa7f Signed-off-by: Colin Cross --- arch/arm64/Kconfig | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 865c961d8cb8..9d0edb767fd8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -949,6 +949,23 @@ config CMDLINE entering them here. As a minimum, you should specify the the root device (e.g. root=/dev/nfs). +choice + prompt "Kernel command line type" if CMDLINE != "" + default CMDLINE_FROM_BOOTLOADER + +config CMDLINE_FROM_BOOTLOADER + bool "Use bootloader kernel arguments if available" + help + Uses the command-line options passed by the boot loader. If + the boot loader doesn't provide any, the default kernel command + string provided in CMDLINE will be used. + +config CMDLINE_EXTEND + bool "Extend bootloader kernel arguments" + help + The command-line arguments provided by the boot loader will be + appended to the default kernel command string. + config CMDLINE_FORCE bool "Always use the default kernel command string" help @@ -956,6 +973,7 @@ config CMDLINE_FORCE loader passes other arguments to the kernel. This is useful if you cannot or don't want to change the command-line options your boot loader passes to the kernel. +endchoice config EFI_STUB bool -- GitLab From e820270abb5d0a75072c719279597db621b3dce6 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 6 Mar 2013 19:10:29 -0800 Subject: [PATCH 0952/1442] ANDROID: of: fix CONFIG_CMDLINE_EXTEND strlcat takes the size of the buffer, not the number of characters to concatenate. If the size of the device tree command line p is larger than the CONFIG_CMDLINE string data, then strcat(data, p, l) will hit a BUG_ON because strlen(data) > l. Replace the second strlcat with a strncpy plus a manual null termination. Also rearrange the code to reduce indent depth to make it more readable, and replace data with a char *cmdline to avoid extra casts. Signed-off-by: Colin Cross --- drivers/of/fdt.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index d3ac492fbb59..f44f979e402d 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1080,32 +1080,38 @@ static const char *config_cmdline = ""; int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data) { - int l; - const char *p; + unsigned long l = 0; + char *p = NULL; + char *cmdline = data; pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname); - if (depth != 1 || !data || + if (depth != 1 || !cmdline || (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) return 0; early_init_dt_check_for_initrd(node); /* Put CONFIG_CMDLINE in if forced or if data had nothing in it to start */ - if (overwrite_incoming_cmdline || !((char *)data)[0]) - strlcpy(data, config_cmdline, COMMAND_LINE_SIZE); + if (overwrite_incoming_cmdline || !cmdline[0]) + strlcpy(cmdline, config_cmdline, COMMAND_LINE_SIZE); /* Retrieve command line unless forcing */ - if (read_dt_cmdline) { + if (read_dt_cmdline) p = of_get_flat_dt_prop(node, "bootargs", &l); - if (p != NULL && l > 0) { - if (concat_cmdline) { - strlcat(data, " ", COMMAND_LINE_SIZE); - strlcat(data, p, min_t(int, (int)l, - COMMAND_LINE_SIZE)); - } else - strlcpy(data, p, min_t(int, (int)l, - COMMAND_LINE_SIZE)); + + if (p != NULL && l > 0) { + if (concat_cmdline) { + int cmdline_len; + int copy_len; + strlcat(cmdline, " ", COMMAND_LINE_SIZE); + cmdline_len = strlen(cmdline); + copy_len = COMMAND_LINE_SIZE - cmdline_len - 1; + copy_len = min((int)l, copy_len); + strncpy(cmdline + cmdline_len, p, copy_len); + cmdline[cmdline_len + copy_len] = '\0'; + } else { + strlcpy(cmdline, p, min((int)l, COMMAND_LINE_SIZE)); } } -- GitLab From 9a4a740554448e2c3092f1f9eda227867e6bcbb8 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 19 Nov 2015 13:45:41 -0800 Subject: [PATCH 0953/1442] ANDROID: of: Fix build warnings In commit d6cb004d80 (of: fix CONFIG_CMDLINE_EXTEND), the types of some variables in early_init_dt_scan_chosen() were modified, which results in build warnings. This patch resets the unsigned long to an int, and re-adds the const to the char*. Change-Id: Ie60ae92b4552e453cf477dd83f42838b3f95975e Signed-off-by: John Stultz --- drivers/of/fdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index f44f979e402d..86688088c50c 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1080,8 +1080,8 @@ static const char *config_cmdline = ""; int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data) { - unsigned long l = 0; - char *p = NULL; + int l = 0; + const char *p = NULL; char *cmdline = data; pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname); -- GitLab From c3c2e99fcc3ed20ab49b482bdc102ebb8037ebd6 Mon Sep 17 00:00:00 2001 From: Rom Lemarchand Date: Mon, 6 Jul 2015 16:50:33 -0700 Subject: [PATCH 0954/1442] ANDROID: initramfs: Add skip_initramfs command line option Add a skip_initramfs option to allow choosing whether to boot using the initramfs or not at runtime. Change-Id: If30428fa748c1d4d3d7b9d97c1f781de5e4558c3 Signed-off-by: Rom Lemarchand --- include/linux/initramfs.h | 32 ++++++++++++++++++++++++++++++++ init/Makefile | 3 --- init/initramfs.c | 19 ++++++++++++++++++- init/noinitramfs.c | 9 ++++++++- 4 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 include/linux/initramfs.h diff --git a/include/linux/initramfs.h b/include/linux/initramfs.h new file mode 100644 index 000000000000..fc7da63b125b --- /dev/null +++ b/include/linux/initramfs.h @@ -0,0 +1,32 @@ +/* + * include/linux/initramfs.h + * + * Copyright (C) 2015, Google + * Rom Lemarchand + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_INITRAMFS_H +#define _LINUX_INITRAMFS_H + +#include + +#if IS_BUILTIN(CONFIG_BLK_DEV_INITRD) + +int __init default_rootfs(void); + +#endif + +#endif /* _LINUX_INITRAMFS_H */ diff --git a/init/Makefile b/init/Makefile index c4fb45525d08..de8e0aa42139 100644 --- a/init/Makefile +++ b/init/Makefile @@ -5,11 +5,8 @@ ccflags-y := -fno-function-sections -fno-data-sections obj-y := main.o version.o mounts.o -ifneq ($(CONFIG_BLK_DEV_INITRD),y) obj-y += noinitramfs.o -else obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o -endif obj-$(CONFIG_GENERIC_CALIBRATE_DELAY) += calibrate.o ifneq ($(CONFIG_ARCH_INIT_TASK),y) diff --git a/init/initramfs.c b/init/initramfs.c index b32ad7d97ac9..f8ce812ba43e 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -18,6 +18,7 @@ #include #include #include +#include static ssize_t __init xwrite(int fd, const char *p, size_t count) { @@ -605,9 +606,25 @@ static void __init clean_rootfs(void) } #endif +static int __initdata do_skip_initramfs; + +static int __init skip_initramfs_param(char *str) +{ + if (*str) + return 0; + do_skip_initramfs = 1; + return 1; +} +__setup("skip_initramfs", skip_initramfs_param); + static int __init populate_rootfs(void) { - char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size); + char *err; + + if (do_skip_initramfs) + return default_rootfs(); + + err = unpack_to_rootfs(__initramfs_start, __initramfs_size); if (err) panic("%s", err); /* Failed to decompress INTERNAL initramfs */ if (initrd_start) { diff --git a/init/noinitramfs.c b/init/noinitramfs.c index 267739d85179..bcc8bcb053ee 100644 --- a/init/noinitramfs.c +++ b/init/noinitramfs.c @@ -21,11 +21,16 @@ #include #include #include +#include +#include /* * Create a simple rootfs that is similar to the default initramfs */ -static int __init default_rootfs(void) +#if !IS_BUILTIN(CONFIG_BLK_DEV_INITRD) +static +#endif +int __init default_rootfs(void) { int err; @@ -49,4 +54,6 @@ static int __init default_rootfs(void) printk(KERN_WARNING "Failed to create a rootfs\n"); return err; } +#if !IS_BUILTIN(CONFIG_BLK_DEV_INITRD) rootfs_initcall(default_rootfs); +#endif -- GitLab From f353d4f802c373d6667231863fc3637600edc12c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 9 Apr 2013 10:30:26 -0700 Subject: [PATCH 0955/1442] ANDROID: pstore: Update Documentation/android.txt Update Documentation/android.txt to reference PSTORE_CONSOLE and PSTORE_RAM instead of ANDROID_RAM_CONSOLE Change-Id: I2c56e73f8c65c3ddbe6ddbf1faadfacb42a09575 Reported-by: Jon Medhurst (Tixy) Signed-off-by: John Stultz --- Documentation/android.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/android.txt b/Documentation/android.txt index 72a62afdf202..0f40a78b045f 100644 --- a/Documentation/android.txt +++ b/Documentation/android.txt @@ -92,8 +92,8 @@ DNOTIFY 1.3 Recommended enabled config options ------------------------------ ANDROID_PMEM -ANDROID_RAM_CONSOLE -ANDROID_RAM_CONSOLE_ERROR_CORRECTION +PSTORE_CONSOLE +PSTORE_RAM SCHEDSTATS DEBUG_PREEMPT DEBUG_MUTEXES -- GitLab From 085351ff2016b8cdc80f75c733e8f69ea8691838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Fri, 2 May 2014 20:23:21 -0700 Subject: [PATCH 0956/1442] ANDROID: pstore/ram: Add ramoops_console_write_buf api MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow writing into the ramoops console buffer. Change-Id: Iff0d69b562e4dae33ea7f8d19412227bebb17e47 Signed-off-by: Arve Hjønnevåg --- fs/pstore/ram.c | 6 ++++++ include/linux/pstore_ram.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 6ad831b9d1b8..bd2dc34da789 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -549,6 +549,12 @@ static int ramoops_parse_dt(struct platform_device *pdev, return 0; } +void notrace ramoops_console_write_buf(const char *buf, size_t size) +{ + struct ramoops_context *cxt = &oops_cxt; + persistent_ram_write(cxt->cprz, buf, size); +} + static int ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index c668c861c96c..485cc8e3f7dd 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -71,6 +71,8 @@ void persistent_ram_free_old(struct persistent_ram_zone *prz); ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz, char *str, size_t len); +void ramoops_console_write_buf(const char *buf, size_t size); + /* * Ramoops platform data * @mem_size memory size for ramoops -- GitLab From 3e4578f42fd7bfe611e666d4dbe89eedc9f7a566 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Tue, 27 Oct 2015 16:42:08 -0700 Subject: [PATCH 0957/1442] ANDROID: mm: add a field to store names for private anonymous memory Userspace processes often have multiple allocators that each do anonymous mmaps to get memory. When examining memory usage of individual processes or systems as a whole, it is useful to be able to break down the various heaps that were allocated by each layer and examine their size, RSS, and physical memory usage. This patch adds a user pointer to the shared union in vm_area_struct that points to a null terminated string inside the user process containing a name for the vma. vmas that point to the same address will be merged, but vmas that point to equivalent strings at different addresses will not be merged. Userspace can set the name for a region of memory by calling prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, start, len, (unsigned long)name); Setting the name to NULL clears it. The names of named anonymous vmas are shown in /proc/pid/maps as [anon:] and in /proc/pid/smaps in a new "Name" field that is only present for named vmas. If the userspace pointer is no longer valid all or part of the name will be replaced with "". The idea to store a userspace pointer to reduce the complexity within mm (at the expense of the complexity of reading /proc/pid/mem) came from Dave Hansen. This results in no runtime overhead in the mm subsystem other than comparing the anon_name pointers when considering vma merging. The pointer is stored in a union with fieds that are only used on file-backed mappings, so it does not increase memory usage. Includes fix from Jed Davis for typo in prctl_set_vma_anon_name, which could attempt to set the name across two vmas at the same time due to a typo, which might corrupt the vma list. Fix it to use tmp instead of end to limit the name setting to a single vma at a time. Change-Id: I9aa7b6b5ef536cd780599ba4e2fba8ceebe8b59f Signed-off-by: Dmitry Shmidt --- Documentation/filesystems/proc.txt | 6 ++ fs/proc/task_mmu.c | 65 +++++++++++- fs/userfaultfd.c | 9 +- include/linux/mm.h | 2 +- include/linux/mm_types.h | 24 ++++- include/uapi/linux/prctl.h | 3 + kernel/sys.c | 152 +++++++++++++++++++++++++++++ mm/madvise.c | 2 +- mm/mempolicy.c | 3 +- mm/mlock.c | 2 +- mm/mmap.c | 39 +++++--- mm/mprotect.c | 2 +- 12 files changed, 281 insertions(+), 28 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 74329fd0add2..6e027ae50d7e 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -392,6 +392,8 @@ is not associated with a file: [stack] = the stack of the main process [vdso] = the "virtual dynamic shared object", the kernel system call handler + [anon:] = an anonymous mapping that has been + named by userspace or if empty, the mapping is anonymous. @@ -419,6 +421,7 @@ KernelPageSize: 4 kB MMUPageSize: 4 kB Locked: 0 kB VmFlags: rd ex mr mw me dw +Name: name from userspace the first of these lines shows the same information as is displayed for the mapping in /proc/PID/maps. The remaining lines show the size of the mapping @@ -486,6 +489,9 @@ Note that there is no guarantee that every flag and associated mnemonic will be present in all further kernel releases. Things get changed, the flags may be vanished or the reverse -- new added. +The "Name" field will only be present on a mapping that has been named by +userspace, and will show the name passed in by userspace. + This file is only present if the CONFIG_MMU kernel configuration option is enabled. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 35b92d81692f..65d28f9ed232 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -127,6 +127,56 @@ static void release_task_mempolicy(struct proc_maps_private *priv) } #endif +static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma) +{ + const char __user *name = vma_get_anon_name(vma); + struct mm_struct *mm = vma->vm_mm; + + unsigned long page_start_vaddr; + unsigned long page_offset; + unsigned long num_pages; + unsigned long max_len = NAME_MAX; + int i; + + page_start_vaddr = (unsigned long)name & PAGE_MASK; + page_offset = (unsigned long)name - page_start_vaddr; + num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE); + + seq_puts(m, "[anon:"); + + for (i = 0; i < num_pages; i++) { + int len; + int write_len; + const char *kaddr; + long pages_pinned; + struct page *page; + + pages_pinned = get_user_pages_remote(current, mm, + page_start_vaddr, 1, 0, &page, NULL); + if (pages_pinned < 1) { + seq_puts(m, "]"); + return; + } + + kaddr = (const char *)kmap(page); + len = min(max_len, PAGE_SIZE - page_offset); + write_len = strnlen(kaddr + page_offset, len); + seq_write(m, kaddr + page_offset, write_len); + kunmap(page); + put_page(page); + + /* if strnlen hit a null terminator then we're done */ + if (write_len != len) + break; + + max_len -= len; + page_offset = 0; + page_start_vaddr += PAGE_SIZE; + } + + seq_putc(m, ']'); +} + static void vma_stop(struct proc_maps_private *priv) { struct mm_struct *mm = priv->mm; @@ -345,8 +395,15 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } - if (is_stack(priv, vma)) + if (is_stack(priv, vma)) { name = "[stack]"; + goto done; + } + + if (vma_get_anon_name(vma)) { + seq_pad(m, ' '); + seq_print_vma_name(m, vma); + } } done: @@ -760,6 +817,12 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) show_map_vma(m, vma, is_pid); + if (vma_get_anon_name(vma)) { + seq_puts(m, "Name: "); + seq_print_vma_name(m, vma); + seq_putc(m, '\n'); + } + seq_printf(m, "Size: %8lu kB\n" "Rss: %8lu kB\n" diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 85959d8324df..69c867c01d52 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -460,7 +460,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file) new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX); + NULL_VM_UFFD_CTX, + vma_get_anon_name(vma)); if (prev) vma = prev; else @@ -839,7 +840,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - ((struct vm_userfaultfd_ctx){ ctx })); + ((struct vm_userfaultfd_ctx){ ctx }), + vma_get_anon_name(vma)); if (prev) { vma = prev; goto next; @@ -976,7 +978,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX); + NULL_VM_UFFD_CTX, + vma_get_anon_name(vma)); if (prev) { vma = prev; goto next; diff --git a/include/linux/mm.h b/include/linux/mm.h index 44a8f6a5e1e3..46c927f1404c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1975,7 +1975,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, - struct mempolicy *, struct vm_userfaultfd_ctx); + struct mempolicy *, struct vm_userfaultfd_ctx, const char __user *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4a8acedf4b7d..4d740f2638d6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -326,11 +326,18 @@ struct vm_area_struct { /* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. + * + * For private anonymous mappings, a pointer to a null terminated string + * in the user process containing the name given to the vma, or NULL + * if unnamed. */ - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; + union { + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; + const char __user *anon_name; + }; /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma @@ -615,4 +622,13 @@ typedef struct { unsigned long val; } swp_entry_t; +/* Return the name for an anonymous mapping or NULL for a file-backed mapping */ +static inline const char __user *vma_get_anon_name(struct vm_area_struct *vma) +{ + if (vma->vm_file) + return NULL; + + return vma->anon_name; +} + #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index a8d0759a9e40..96b269bcfa17 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -197,4 +197,7 @@ struct prctl_mm_map { # define PR_CAP_AMBIENT_LOWER 3 # define PR_CAP_AMBIENT_CLEAR_ALL 4 +#define PR_SET_VMA 0x53564d41 +# define PR_SET_VMA_ANON_NAME 0 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 89d5be418157..1157bf7a448f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -41,6 +41,8 @@ #include #include #include +#include +#include #include #include @@ -2072,6 +2074,153 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) } #endif +#ifdef CONFIG_MMU +static int prctl_update_vma_anon_name(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end, + const char __user *name_addr) +{ + struct mm_struct *mm = vma->vm_mm; + int error = 0; + pgoff_t pgoff; + + if (name_addr == vma_get_anon_name(vma)) { + *prev = vma; + goto out; + } + + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); + *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma, + vma->vm_file, pgoff, vma_policy(vma), + vma->vm_userfaultfd_ctx, name_addr); + if (*prev) { + vma = *prev; + goto success; + } + + *prev = vma; + + if (start != vma->vm_start) { + error = split_vma(mm, vma, start, 1); + if (error) + goto out; + } + + if (end != vma->vm_end) { + error = split_vma(mm, vma, end, 0); + if (error) + goto out; + } + +success: + if (!vma->vm_file) + vma->anon_name = name_addr; + +out: + if (error == -ENOMEM) + error = -EAGAIN; + return error; +} + +static int prctl_set_vma_anon_name(unsigned long start, unsigned long end, + unsigned long arg) +{ + unsigned long tmp; + struct vm_area_struct *vma, *prev; + int unmapped_error = 0; + int error = -EINVAL; + + /* + * If the interval [start,end) covers some unmapped address + * ranges, just ignore them, but return -ENOMEM at the end. + * - this matches the handling in madvise. + */ + vma = find_vma_prev(current->mm, start, &prev); + if (vma && start > vma->vm_start) + prev = vma; + + for (;;) { + /* Still start < end. */ + error = -ENOMEM; + if (!vma) + return error; + + /* Here start < (end|vma->vm_end). */ + if (start < vma->vm_start) { + unmapped_error = -ENOMEM; + start = vma->vm_start; + if (start >= end) + return error; + } + + /* Here vma->vm_start <= start < (end|vma->vm_end) */ + tmp = vma->vm_end; + if (end < tmp) + tmp = end; + + /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ + error = prctl_update_vma_anon_name(vma, &prev, start, tmp, + (const char __user *)arg); + if (error) + return error; + start = tmp; + if (prev && start < prev->vm_end) + start = prev->vm_end; + error = unmapped_error; + if (start >= end) + return error; + if (prev) + vma = prev->vm_next; + else /* madvise_remove dropped mmap_sem */ + vma = find_vma(current->mm, start); + } +} + +static int prctl_set_vma(unsigned long opt, unsigned long start, + unsigned long len_in, unsigned long arg) +{ + struct mm_struct *mm = current->mm; + int error; + unsigned long len; + unsigned long end; + + if (start & ~PAGE_MASK) + return -EINVAL; + len = (len_in + ~PAGE_MASK) & PAGE_MASK; + + /* Check to see whether len was rounded up from small -ve to zero */ + if (len_in && !len) + return -EINVAL; + + end = start + len; + if (end < start) + return -EINVAL; + + if (end == start) + return 0; + + down_write(&mm->mmap_sem); + + switch (opt) { + case PR_SET_VMA_ANON_NAME: + error = prctl_set_vma_anon_name(start, end, arg); + break; + default: + error = -EINVAL; + } + + up_write(&mm->mmap_sem); + + return error; +} +#else /* CONFIG_MMU */ +static int prctl_set_vma(unsigned long opt, unsigned long start, + unsigned long len_in, unsigned long arg) +{ + return -EINVAL; +} +#endif + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2270,6 +2419,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_FP_MODE: error = GET_FP_MODE(me); break; + case PR_SET_VMA: + error = prctl_set_vma(arg2, arg3, arg4, arg5); + break; default: error = -EINVAL; break; diff --git a/mm/madvise.c b/mm/madvise.c index 93fb63e88b5e..279627aace24 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -108,7 +108,7 @@ static long madvise_behavior(struct vm_area_struct *vma, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx); + vma->vm_userfaultfd_ctx, vma_get_anon_name(vma)); if (*prev) { vma = *prev; goto success; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0b859af06b87..238c4e8f2654 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -752,7 +752,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, ((vmstart - vma->vm_start) >> PAGE_SHIFT); prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, - new_pol, vma->vm_userfaultfd_ctx); + new_pol, vma->vm_userfaultfd_ctx, + vma_get_anon_name(vma)); if (prev) { vma = prev; next = vma->vm_next; diff --git a/mm/mlock.c b/mm/mlock.c index cdbed8aaa426..facf6e70e93f 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -529,7 +529,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx); + vma->vm_userfaultfd_ctx, vma_get_anon_name(vma)); if (*prev) { vma = *prev; goto success; diff --git a/mm/mmap.c b/mm/mmap.c index 1af87c14183d..590df3862840 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -956,7 +956,8 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, */ static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags, - struct vm_userfaultfd_ctx vm_userfaultfd_ctx) + struct vm_userfaultfd_ctx vm_userfaultfd_ctx, + const char __user *anon_name) { /* * VM_SOFTDIRTY should not prevent from VMA merging, if we @@ -974,6 +975,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma, return 0; if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx)) return 0; + if (vma_get_anon_name(vma) != anon_name) + return 0; return 1; } @@ -1006,9 +1009,10 @@ static int can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, - struct vm_userfaultfd_ctx vm_userfaultfd_ctx) + struct vm_userfaultfd_ctx vm_userfaultfd_ctx, + const char __user *anon_name) { - if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) && + if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { if (vma->vm_pgoff == vm_pgoff) return 1; @@ -1027,9 +1031,10 @@ static int can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, - struct vm_userfaultfd_ctx vm_userfaultfd_ctx) + struct vm_userfaultfd_ctx vm_userfaultfd_ctx, + const char __user *anon_name) { - if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) && + if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { pgoff_t vm_pglen; vm_pglen = vma_pages(vma); @@ -1040,9 +1045,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, } /* - * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out - * whether that can be merged with its predecessor or its successor. - * Or both (it neatly fills a hole). + * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name), + * figure out whether that can be merged with its predecessor or its + * successor. Or both (it neatly fills a hole). * * In most cases - when called for mmap, brk or mremap - [addr,end) is * certain not to be mapped by the time vma_merge is called; but when @@ -1084,7 +1089,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, unsigned long end, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t pgoff, struct mempolicy *policy, - struct vm_userfaultfd_ctx vm_userfaultfd_ctx) + struct vm_userfaultfd_ctx vm_userfaultfd_ctx, + const char __user *anon_name) { pgoff_t pglen = (end - addr) >> PAGE_SHIFT; struct vm_area_struct *area, *next; @@ -1117,7 +1123,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, mpol_equal(vma_policy(prev), policy) && can_vma_merge_after(prev, vm_flags, anon_vma, file, pgoff, - vm_userfaultfd_ctx)) { + vm_userfaultfd_ctx, + anon_name)) { /* * OK, it can. Can we now merge in the successor as well? */ @@ -1126,7 +1133,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, - vm_userfaultfd_ctx) && + vm_userfaultfd_ctx, + anon_name) && is_mergeable_anon_vma(prev->anon_vma, next->anon_vma, NULL)) { /* cases 1, 6 */ @@ -1149,7 +1157,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, mpol_equal(policy, vma_policy(next)) && can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, - vm_userfaultfd_ctx)) { + vm_userfaultfd_ctx, + anon_name)) { if (prev && addr < prev->vm_end) /* case 4 */ err = __vma_adjust(prev, prev->vm_start, addr, prev->vm_pgoff, NULL, next); @@ -1627,7 +1636,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * Can we just expand an old mapping? */ vma = vma_merge(mm, prev, addr, addr + len, vm_flags, - NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX); + NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL); if (vma) goto out; @@ -2858,7 +2867,7 @@ static int do_brk(unsigned long addr, unsigned long request) /* Can we just expand an old private anonymous mapping? */ vma = vma_merge(mm, prev, addr, addr + len, flags, - NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX); + NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL); if (vma) goto out; @@ -3019,7 +3028,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, return NULL; /* should never get here */ new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx); + vma->vm_userfaultfd_ctx, vma_get_anon_name(vma)); if (new_vma) { /* * Source vma may have been merged into new_vma diff --git a/mm/mprotect.c b/mm/mprotect.c index 11936526b08b..2c4083682f5c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -302,7 +302,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *pprev = vma_merge(mm, *pprev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx); + vma->vm_userfaultfd_ctx, vma_get_anon_name(vma)); if (*pprev) { vma = *pprev; VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY); -- GitLab From 47f5e0529054e824f1e663d2df77a10e90d89a85 Mon Sep 17 00:00:00 2001 From: San Mehat Date: Wed, 29 Jul 2009 20:21:28 -0700 Subject: [PATCH 0958/1442] ANDROID: serial_core: Add wake_peer uart operation Add wake_peer which is called before starting UART TX. The idea here is to provide a mechanism where we can wakeup our peer before sending data. Change-Id: I42e0779b635f64ca99184b45d5b028de80197491 Signed-off-by: San Mehat --- drivers/tty/serial/serial_core.c | 3 +++ include/linux/serial_core.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index f2303f390345..2e2b88aa3004 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -131,6 +131,9 @@ static void __uart_start(struct tty_struct *tty) struct uart_state *state = tty->driver_data; struct uart_port *port = state->uart_port; + if (port && port->ops->wake_peer) + port->ops->wake_peer(port); + if (port && !uart_tx_stopped(port)) port->ops->start_tx(port); } diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 344201437017..1a943975f09c 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -66,6 +66,7 @@ struct uart_ops { void (*set_ldisc)(struct uart_port *, struct ktermios *); void (*pm)(struct uart_port *, unsigned int state, unsigned int oldstate); + void (*wake_peer)(struct uart_port *); /* * Return a string describing the type of the port -- GitLab From 1672c66bee79f8913a664863bd066e4b09c8f01f Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Wed, 24 Aug 2011 15:01:30 -0700 Subject: [PATCH 0959/1442] ANDROID: fuse: Freeze client on suspend when request sent to userspace Suspend attempts can abort when the FUSE daemon is already frozen and a client is waiting uninterruptibly for a response, causing freezing of tasks to fail. Use the freeze-friendly wait API, but disregard other signals. Change-Id: Icefb7e4bbc718ccb76bf3c04daaa5eeea7e0e63c Signed-off-by: Todd Poynor --- fs/fuse/dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 70ea57c7b6bb..a9e1d708e1e9 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -19,6 +19,7 @@ #include #include #include +#include MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); @@ -449,7 +450,9 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) * Either request is already in userspace, or it was forced. * Wait it out. */ - wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags)); + while (!test_bit(FR_FINISHED, &req->flags)) + wait_event_freezable(req->waitq, + test_bit(FR_FINISHED, &req->flags)); } static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) -- GitLab From b1173ef5128715a2ddec231e82e819d815d60a2f Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Thu, 30 Aug 2012 23:09:14 -0700 Subject: [PATCH 0960/1442] ANDROID: w1: ds2482: Manage SLPZ pin sleep state Place SLPZ pin in sleep state at system suspend time if a GPIO is provided by board platform data. Change-Id: I93c61fa0ae474e968e0f909209c9bfcaafe3dd2c Signed-off-by: Todd Poynor --- drivers/w1/masters/ds2482.c | 47 +++++++++++++++++++++++++++- include/linux/platform_data/ds2482.h | 21 +++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 include/linux/platform_data/ds2482.h diff --git a/drivers/w1/masters/ds2482.c b/drivers/w1/masters/ds2482.c index 2e30db1b1a43..fa13fa8c81af 100644 --- a/drivers/w1/masters/ds2482.c +++ b/drivers/w1/masters/ds2482.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include "../w1.h" @@ -97,7 +99,8 @@ static const u8 ds2482_chan_rd[8] = static int ds2482_probe(struct i2c_client *client, const struct i2c_device_id *id); static int ds2482_remove(struct i2c_client *client); - +static int ds2482_suspend(struct device *dev); +static int ds2482_resume(struct device *dev); /** * Driver data (common to all clients) @@ -108,9 +111,15 @@ static const struct i2c_device_id ds2482_id[] = { }; MODULE_DEVICE_TABLE(i2c, ds2482_id); +static const struct dev_pm_ops ds2482_pm_ops = { + .suspend = ds2482_suspend, + .resume = ds2482_resume, +}; + static struct i2c_driver ds2482_driver = { .driver = { .name = "ds2482", + .pm = &ds2482_pm_ops, }, .probe = ds2482_probe, .remove = ds2482_remove, @@ -132,6 +141,7 @@ struct ds2482_w1_chan { struct ds2482_data { struct i2c_client *client; struct mutex access_lock; + int slpz_gpio; /* 1-wire interface(s) */ int w1_count; /* 1 or 8 */ @@ -460,11 +470,31 @@ static u8 ds2482_w1_set_pullup(void *data, int delay) return retval; } +static int ds2482_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ds2482_data *data = i2c_get_clientdata(client); + + if (data->slpz_gpio >= 0) + gpio_set_value(data->slpz_gpio, 0); + return 0; +} + +static int ds2482_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ds2482_data *data = i2c_get_clientdata(client); + + if (data->slpz_gpio >= 0) + gpio_set_value(data->slpz_gpio, 1); + return 0; +} static int ds2482_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct ds2482_data *data; + struct ds2482_platform_data *pdata; int err = -ENODEV; int temp1; int idx; @@ -531,6 +561,16 @@ static int ds2482_probe(struct i2c_client *client, } } + pdata = client->dev.platform_data; + data->slpz_gpio = pdata ? pdata->slpz_gpio : -1; + + if (data->slpz_gpio >= 0) { + err = gpio_request_one(data->slpz_gpio, GPIOF_OUT_INIT_HIGH, + "ds2482.slpz"); + if (err < 0) + goto exit_w1_remove; + } + return 0; exit_w1_remove: @@ -555,6 +595,11 @@ static int ds2482_remove(struct i2c_client *client) w1_remove_master_device(&data->w1_ch[idx].w1_bm); } + if (data->slpz_gpio >= 0) { + gpio_set_value(data->slpz_gpio, 0); + gpio_free(data->slpz_gpio); + } + /* Free the memory */ kfree(data); return 0; diff --git a/include/linux/platform_data/ds2482.h b/include/linux/platform_data/ds2482.h new file mode 100644 index 000000000000..5a6879e2a09a --- /dev/null +++ b/include/linux/platform_data/ds2482.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2012 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __PLATFORM_DATA_DS2482__ +#define __PLATFORM_DATA_DS2482__ + +struct ds2482_platform_data { + int slpz_gpio; +}; + +#endif /* __PLATFORM_DATA_DS2482__ */ -- GitLab From b794bd357eb86fbb8d5264534ac998c28654ae7c Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Wed, 30 Dec 2015 09:26:15 -0800 Subject: [PATCH 0961/1442] ANDROID: rtc-palmas: correct for bcd year Replace bcd2bin and bin2bcd with one that maps years 1970 to 2129 in a pattern that works with the underlying hardware. The only transition that does not work correctly for this rtc clock is the transition from 2099 to 2100, it proceeds to 2000. The rtc clock retains and transitions the year correctly in all other circumstances. Signed-off-by: Mark Salyzyn Bug: 26346842 Change-Id: Ie527700190b1ae4b4bc3c12279d875aa5985b168 --- drivers/rtc/rtc-palmas.c | 44 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c index 4bcfb88674d3..34aea38ebfa6 100644 --- a/drivers/rtc/rtc-palmas.c +++ b/drivers/rtc/rtc-palmas.c @@ -45,6 +45,42 @@ struct palmas_rtc { /* Total number of RTC registers needed to set time*/ #define PALMAS_NUM_TIME_REGS (PALMAS_YEARS_REG - PALMAS_SECONDS_REG + 1) +/* + * Special bin2bcd mapping to deal with bcd storage of year. + * + * 0-69 -> 0xD0 + * 70-99 (1970 - 1999) -> 0xD0 - 0xF9 (correctly rolls to 0x00) + * 100-199 (2000 - 2099) -> 0x00 - 0x99 (does not roll to 0xA0 :-( ) + * 200-229 (2100 - 2129) -> 0xA0 - 0xC9 (really for completeness) + * 230- -> 0xC9 + * + * Confirmed: the only transition that does not work correctly for this rtc + * clock is the transition from 2099 to 2100, it proceeds to 2000. We will + * accept this issue since the clock retains and transitions the year correctly + * in all other conditions. + */ +static unsigned char year_bin2bcd(int val) +{ + if (val < 70) + return 0xD0; + if (val < 100) + return bin2bcd(val - 20) | 0x80; /* KISS leverage of bin2bcd */ + if (val >= 230) + return 0xC9; + if (val >= 200) + return bin2bcd(val - 180) | 0x80; + return bin2bcd(val - 100); +} + +static int year_bcd2bin(unsigned char val) +{ + if (val >= 0xD0) + return bcd2bin(val & 0x7F) + 20; + if (val >= 0xA0) + return bcd2bin(val & 0x7F) + 180; + return bcd2bin(val) + 100; +} + static int palmas_rtc_read_time(struct device *dev, struct rtc_time *tm) { unsigned char rtc_data[PALMAS_NUM_TIME_REGS]; @@ -71,7 +107,7 @@ static int palmas_rtc_read_time(struct device *dev, struct rtc_time *tm) tm->tm_hour = bcd2bin(rtc_data[2]); tm->tm_mday = bcd2bin(rtc_data[3]); tm->tm_mon = bcd2bin(rtc_data[4]) - 1; - tm->tm_year = bcd2bin(rtc_data[5]) + 100; + tm->tm_year = year_bcd2bin(rtc_data[5]); return ret; } @@ -87,7 +123,7 @@ static int palmas_rtc_set_time(struct device *dev, struct rtc_time *tm) rtc_data[2] = bin2bcd(tm->tm_hour); rtc_data[3] = bin2bcd(tm->tm_mday); rtc_data[4] = bin2bcd(tm->tm_mon + 1); - rtc_data[5] = bin2bcd(tm->tm_year - 100); + rtc_data[5] = year_bin2bcd(tm->tm_year); /* Stop RTC while updating the RTC time registers */ ret = palmas_update_bits(palmas, PALMAS_RTC_BASE, PALMAS_RTC_CTRL_REG, @@ -142,7 +178,7 @@ static int palmas_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) alm->time.tm_hour = bcd2bin(alarm_data[2]); alm->time.tm_mday = bcd2bin(alarm_data[3]); alm->time.tm_mon = bcd2bin(alarm_data[4]) - 1; - alm->time.tm_year = bcd2bin(alarm_data[5]) + 100; + alm->time.tm_year = year_bcd2bin(alarm_data[5]); ret = palmas_read(palmas, PALMAS_RTC_BASE, PALMAS_RTC_INTERRUPTS_REG, &int_val); @@ -173,7 +209,7 @@ static int palmas_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) alarm_data[2] = bin2bcd(alm->time.tm_hour); alarm_data[3] = bin2bcd(alm->time.tm_mday); alarm_data[4] = bin2bcd(alm->time.tm_mon + 1); - alarm_data[5] = bin2bcd(alm->time.tm_year - 100); + alarm_data[5] = year_bin2bcd(alm->time.tm_year); ret = palmas_bulk_write(palmas, PALMAS_RTC_BASE, PALMAS_ALARM_SECONDS_REG, alarm_data, PALMAS_NUM_TIME_REGS); -- GitLab From aaf78ec6c898b72f756bdb7d9ea43c5c15b8ae18 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Fri, 11 Jan 2013 13:51:48 -0800 Subject: [PATCH 0962/1442] ANDROID: hardlockup: detect hard lockups without NMIs using secondary cpus Emulate NMIs on systems where they are not available by using timer interrupts on other cpus. Each cpu will use its softlockup hrtimer to check that the next cpu is processing hrtimer interrupts by verifying that a counter is increasing. This patch is useful on systems where the hardlockup detector is not available due to a lack of NMIs, for example most ARM SoCs. Without this patch any cpu stuck with interrupts disabled can cause a hardware watchdog reset with no debugging information, but with this patch the kernel can detect the lockup and panic, which can result in useful debugging info. Change-Id: Ia5faf50243e19c1755201212e04c8892d929785a Signed-off-by: Colin Cross --- include/linux/nmi.h | 5 +- kernel/watchdog.c | 123 ++++++++++++++++++++++++++++++++++++++++++-- lib/Kconfig.debug | 14 ++++- 3 files changed, 135 insertions(+), 7 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a78c35cff1ae..780949dff0ed 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -14,8 +14,11 @@ * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ -#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR_NMI) #include +#endif + +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) extern void touch_nmi_watchdog(void); #else static inline void touch_nmi_watchdog(void) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9acb29f280ec..cd33631c7257 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -104,6 +104,11 @@ static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); +#endif +#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU +static cpumask_t __read_mostly watchdog_cpus; +#endif +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif static unsigned long soft_lockup_nmi_warn; @@ -287,7 +292,7 @@ void touch_softlockup_watchdog_sync(void) __this_cpu_write(watchdog_touch_ts, 0); } -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI /* watchdog detector functions */ static bool is_hardlockup(void) { @@ -301,6 +306,76 @@ static bool is_hardlockup(void) } #endif +#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU +static unsigned int watchdog_next_cpu(unsigned int cpu) +{ + cpumask_t cpus = watchdog_cpus; + unsigned int next_cpu; + + next_cpu = cpumask_next(cpu, &cpus); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(&cpus); + + if (next_cpu == cpu) + return nr_cpu_ids; + + return next_cpu; +} + +static int is_hardlockup_other_cpu(unsigned int cpu) +{ + unsigned long hrint = per_cpu(hrtimer_interrupts, cpu); + + if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint) + return 1; + + per_cpu(hrtimer_interrupts_saved, cpu) = hrint; + return 0; +} + +static void watchdog_check_hardlockup_other_cpu(void) +{ + unsigned int next_cpu; + + /* + * Test for hardlockups every 3 samples. The sample period is + * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over + * watchdog_thresh (over by 20%). + */ + if (__this_cpu_read(hrtimer_interrupts) % 3 != 0) + return; + + /* check for a hardlockup on the next cpu */ + next_cpu = watchdog_next_cpu(smp_processor_id()); + if (next_cpu >= nr_cpu_ids) + return; + + smp_rmb(); + + if (per_cpu(watchdog_nmi_touch, next_cpu) == true) { + per_cpu(watchdog_nmi_touch, next_cpu) = false; + return; + } + + if (is_hardlockup_other_cpu(next_cpu)) { + /* only warn once */ + if (per_cpu(hard_watchdog_warn, next_cpu) == true) + return; + + if (hardlockup_panic) + panic("Watchdog detected hard LOCKUP on cpu %u", next_cpu); + else + WARN(1, "Watchdog detected hard LOCKUP on cpu %u", next_cpu); + + per_cpu(hard_watchdog_warn, next_cpu) = true; + } else { + per_cpu(hard_watchdog_warn, next_cpu) = false; + } +} +#else +static inline void watchdog_check_hardlockup_other_cpu(void) { return; } +#endif + static int is_softlockup(unsigned long touch_ts) { unsigned long now = get_timestamp(); @@ -313,7 +388,7 @@ static int is_softlockup(unsigned long touch_ts) return 0; } -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, @@ -376,7 +451,7 @@ static void watchdog_overflow_callback(struct perf_event *event, __this_cpu_write(hard_watchdog_warn, false); return; } -#endif /* CONFIG_HARDLOCKUP_DETECTOR */ +#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */ static void watchdog_interrupt_count(void) { @@ -400,6 +475,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* kick the hardlockup detector */ watchdog_interrupt_count(); + /* test for hardlockups on the next cpu */ + watchdog_check_hardlockup_other_cpu(); + /* kick the softlockup detector */ wake_up_process(__this_cpu_read(softlockup_watchdog)); @@ -577,7 +655,7 @@ static void watchdog(unsigned int cpu) watchdog_nmi_disable(cpu); } -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI /* * People like the simple clean cpu node info on boot. * Reduce the watchdog noise by only printing messages @@ -675,10 +753,45 @@ static void watchdog_nmi_disable(unsigned int cpu) } } +#else +#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU +static int watchdog_nmi_enable(unsigned int cpu) +{ + /* + * The new cpu will be marked online before the first hrtimer interrupt + * runs on it. If another cpu tests for a hardlockup on the new cpu + * before it has run its first hrtimer, it will get a false positive. + * Touch the watchdog on the new cpu to delay the first check for at + * least 3 sampling periods to guarantee one hrtimer has run on the new + * cpu. + */ + per_cpu(watchdog_nmi_touch, cpu) = true; + smp_wmb(); + cpumask_set_cpu(cpu, &watchdog_cpus); + return 0; +} + +static void watchdog_nmi_disable(unsigned int cpu) +{ + unsigned int next_cpu = watchdog_next_cpu(cpu); + + /* + * Offlining this cpu will cause the cpu before this one to start + * checking the one after this one. If this cpu just finished checking + * the next cpu and updating hrtimer_interrupts_saved, and then the + * previous cpu checks it within one sample period, it will trigger a + * false positive. Touch the watchdog on the next cpu to prevent it. + */ + if (next_cpu < nr_cpu_ids) + per_cpu(watchdog_nmi_touch, next_cpu) = true; + smp_wmb(); + cpumask_clear_cpu(cpu, &watchdog_cpus); +} #else static int watchdog_nmi_enable(unsigned int cpu) { return 0; } static void watchdog_nmi_disable(unsigned int cpu) { return; } -#endif /* CONFIG_HARDLOCKUP_DETECTOR */ +#endif /* CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU */ +#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */ static struct smp_hotplug_thread watchdog_threads = { .store = &softlockup_watchdog, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a6c8db1d62f6..1941f80206f8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -764,15 +764,27 @@ config LOCKUP_DETECTOR The overhead should be minimal. A periodic hrtimer runs to generate interrupts and kick the watchdog task every 4 seconds. An NMI is generated every 10 seconds or so to check for hardlockups. + If NMIs are not available on the platform, every 12 seconds the + hrtimer interrupt on one cpu will be used to check for hardlockups + on the next cpu. The frequency of hrtimer and NMI events and the soft and hard lockup thresholds can be controlled through the sysctl watchdog_thresh. -config HARDLOCKUP_DETECTOR +config HARDLOCKUP_DETECTOR_NMI def_bool y depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI +config HARDLOCKUP_DETECTOR_OTHER_CPU + def_bool y + depends on LOCKUP_DETECTOR && SMP + depends on !HARDLOCKUP_DETECTOR_NMI && !HAVE_NMI_WATCHDOG + +config HARDLOCKUP_DETECTOR + def_bool y + depends on HARDLOCKUP_DETECTOR_NMI || HARDLOCKUP_DETECTOR_OTHER_CPU + config BOOTPARAM_HARDLOCKUP_PANIC bool "Panic (Reboot) On Hard Lockups" depends on HARDLOCKUP_DETECTOR -- GitLab From cc14b9e149a11eaaecace45d5da8b2ab57902e9b Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 23 Jul 2014 16:55:07 -0700 Subject: [PATCH 0963/1442] ANDROID: ext4: Add support for FIDTRIM, a best-effort ioctl for deep discard trim * What This provides an interface for issuing an FITRIM which uses the secure discard instead of just a discard. Only the eMMC command is "secure", and not how the FS uses it: due to the fact that the FS might reassign a region somewhere else, the original deleted data will not be affected by the "trim" which only handles un-used regions. So we'll just call it "deep discard", and note that this is a "best effort" cleanup. * Why Once in a while, We want to be able to cleanup most of the unused blocks after erasing a bunch of files. We don't want to constantly secure-discard via a mount option. From an eMMC spec perspective, it tells the device to really get rid of all the data for the specified blocks and not just put them back into the pool of free ones (unlike the normal TRIM). The eMMC spec says the secure trim handling must make sure the data (and metadata) is not available anymore. A simple TRIM doesn't clear the data, it just puts blocks in the free pool. JEDEC Standard No. 84-A441 7.6.9 Secure Erase 7.6.10 Secure Trim From an FS perspective, it is acceptable to leave some data behind. - directory entries related to deleted files - databases entries related to deleted files - small-file data stored in inode extents - blocks held by the FS waiting to be re-used (mitigated by sync). - blocks reassigned by the FS prior to FIDTRIM. Change-Id: I676a1404a80130d93930c84898360f2e6fb2f81e Signed-off-by: Geremy Condra Signed-off-by: JP Abgrall --- fs/ext4/ext4.h | 3 ++- fs/ext4/ioctl.c | 7 ++++++- fs/ext4/mballoc.c | 28 ++++++++++++++++++---------- include/uapi/linux/fs.h | 2 ++ 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a8a750f59621..20ee0e4a5829 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2450,7 +2450,8 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count); -extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); +extern int ext4_trim_fs(struct super_block *, struct fstrim_range *, + unsigned long blkdev_flags); /* inode.c */ int ext4_inode_is_fast_symlink(struct inode *inode); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bf5ae8ebbc97..cec9280950b8 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -735,11 +735,13 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return err; } + case FIDTRIM: case FITRIM: { struct request_queue *q = bdev_get_queue(sb->s_bdev); struct fstrim_range range; int ret = 0; + int flags = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -747,13 +749,16 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!blk_queue_discard(q)) return -EOPNOTSUPP; + if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secure_erase(q)) + return -EOPNOTSUPP; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; range.minlen = max((unsigned int)range.minlen, q->limits.discard_granularity); - ret = ext4_trim_fs(sb, &range); + ret = ext4_trim_fs(sb, &range, flags); if (ret < 0) return ret; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f418f55c2bbe..81e4b5610d6e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2770,7 +2770,8 @@ int ext4_mb_release(struct super_block *sb) } static inline int ext4_issue_discard(struct super_block *sb, - ext4_group_t block_group, ext4_grpblk_t cluster, int count) + ext4_group_t block_group, ext4_grpblk_t cluster, int count, + unsigned long flags) { ext4_fsblk_t discard_block; @@ -2779,7 +2780,7 @@ static inline int ext4_issue_discard(struct super_block *sb, count = EXT4_C2B(EXT4_SB(sb), count); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); - return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); + return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags); } /* @@ -2801,7 +2802,7 @@ static void ext4_free_data_callback(struct super_block *sb, if (test_opt(sb, DISCARD)) { err = ext4_issue_discard(sb, entry->efd_group, entry->efd_start_cluster, - entry->efd_count); + entry->efd_count, 0); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%d block:%d count:%d failed" @@ -4847,7 +4848,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * them with group lock_held */ if (test_opt(sb, DISCARD)) { - err = ext4_issue_discard(sb, block_group, bit, count); + err = ext4_issue_discard(sb, block_group, bit, count, + 0); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%d block:%d count:%lu failed" @@ -5043,13 +5045,15 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, * @count: number of blocks to TRIM * @group: alloc. group we are working with * @e4b: ext4 buddy for the group + * @blkdev_flags: flags for the block device * * Trim "count" blocks starting at "start" in the "group". To assure that no * one will allocate those blocks, mark it as used in buddy bitmap. This must * be called with under the group lock. */ static int ext4_trim_extent(struct super_block *sb, int start, int count, - ext4_group_t group, struct ext4_buddy *e4b) + ext4_group_t group, struct ext4_buddy *e4b, + unsigned long blkdev_flags) __releases(bitlock) __acquires(bitlock) { @@ -5070,7 +5074,7 @@ __acquires(bitlock) */ mb_mark_used(e4b, &ex); ext4_unlock_group(sb, group); - ret = ext4_issue_discard(sb, group, start, count); + ret = ext4_issue_discard(sb, group, start, count, blkdev_flags); ext4_lock_group(sb, group); mb_free_blocks(NULL, e4b, start, ex.fe_len); return ret; @@ -5083,6 +5087,7 @@ __acquires(bitlock) * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count + * @blkdev_flags: flags for the block device * * ext4_trim_all_free walks through group's buddy bitmap searching for free * extents. When the free block is found, ext4_trim_extent is called to TRIM @@ -5097,7 +5102,7 @@ __acquires(bitlock) static ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, - ext4_grpblk_t minblocks) + ext4_grpblk_t minblocks, unsigned long blkdev_flags) { void *bitmap; ext4_grpblk_t next, count = 0, free_count = 0; @@ -5130,7 +5135,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, if ((next - start) >= minblocks) { ret = ext4_trim_extent(sb, start, - next - start, group, &e4b); + next - start, group, &e4b, + blkdev_flags); if (ret && ret != -EOPNOTSUPP) break; ret = 0; @@ -5172,6 +5178,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, * ext4_trim_fs() -- trim ioctl handle function * @sb: superblock for filesystem * @range: fstrim_range structure + * @blkdev_flags: flags for the block device * * start: First Byte to trim * len: number of Bytes to trim from start @@ -5180,7 +5187,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, * start to start+len. For each such a group ext4_trim_all_free function * is invoked to trim all free space. */ -int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) +int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range, + unsigned long blkdev_flags) { struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; @@ -5236,7 +5244,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, - end, minlen); + end, minlen, blkdev_flags); if (cnt < 0) { ret = cnt; break; diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index acb2b6152ba0..92d9c68c696c 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -236,6 +236,8 @@ struct fsxattr { #define FICLONERANGE _IOW(0x94, 13, struct file_clone_range) #define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range) +#define FIDTRIM _IOWR('f', 128, struct fstrim_range) /* Deep discard trim */ + #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_IOC_SETFLAGS _IOW('f', 2, long) #define FS_IOC_GETVERSION _IOR('v', 1, long) -- GitLab From b236cdb8f40f79261a8e1f8ef9e1545a51a570ba Mon Sep 17 00:00:00 2001 From: Rom Lemarchand Date: Thu, 5 Feb 2015 16:07:59 -0800 Subject: [PATCH 0964/1442] ANDROID: kbuild: make it possible to specify the module output dir Make modinst_dir user-defined on the command line. This allows to do things like: make MODLIB=output/ modinst_dir=. modules_install to ensure all the .ko are in the output/ directory. Change-Id: I2bc007eea27ee744d35289e26e4a8ac43ba04151 Signed-off-by: Rom Lemarchand --- scripts/Makefile.modinst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index 07650eeaaf06..6f4c3f5a7ae3 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -29,7 +29,7 @@ quiet_cmd_modules_install = INSTALL $@ INSTALL_MOD_DIR ?= extra ext-mod-dir = $(INSTALL_MOD_DIR)$(subst $(patsubst %/,%,$(KBUILD_EXTMOD)),,$(@D)) -modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D)) +modinst_dir ?= $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D)) $(modules): $(call cmd,modules_install,$(MODLIB)/$(modinst_dir)) -- GitLab From 9cce66d785f58587e8ceb5fb816697937d542376 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 5 Jan 2016 17:36:31 +0530 Subject: [PATCH 0965/1442] ANDROID: kbuild: Makefile.clean: make Kbuild and Makefile optional AOSP commit b13ce9f4aa6f "ARM64: add option to build Image.gz/dtb combo" broke archclean / mrproper build targets and we run into: ---------- ./scripts/Makefile.clean:14: arch/arm64/boot/amd/Makefile: No such file or directory make[2]: *** No rule to make target `arch/arm64/boot/amd/Makefile'. Stop. make[1]: *** [arch/arm64/boot/amd] Error 2 make: *** [archclean] Error 2 ---------- This patch skip the missing Kbuild/Makefile reporting error. It does the job (i.e cleanup dts/*/*.dtb and do not spit out missing file error messages as well). Signed-off-by: Amit Pundir --- scripts/Makefile.clean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean index 50616ea25131..2e70c6f06354 100644 --- a/scripts/Makefile.clean +++ b/scripts/Makefile.clean @@ -11,7 +11,7 @@ include scripts/Kbuild.include # The filename Kbuild has precedence over Makefile kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) -include $(if $(wildcard $(kbuild-dir)/Kbuild), $(kbuild-dir)/Kbuild, $(kbuild-dir)/Makefile) +-include $(if $(wildcard $(kbuild-dir)/Kbuild), $(kbuild-dir)/Kbuild, $(kbuild-dir)/Makefile) # Figure out what we need to build from the various variables # ========================================================================== -- GitLab From ebab868d9044f1650440b1ee039671fc56e84039 Mon Sep 17 00:00:00 2001 From: Christian Poetzsch Date: Fri, 24 Jul 2015 16:42:58 +0100 Subject: [PATCH 0966/1442] ANDROID: Fix for in kernel emergency remount when loop mounts are used adb reboot calls /proc/sysrq-trigger to force an emergency remount (ro) of all mounted disks. This is executed in the order of the time the mount was originally done. Because we have a test system which loop mount images from an extra partition, we see errors cause the loop mounted partitions gets remounted after this physical partition was set to read only already. Fix this by reversing the order of the emergency remount. This will remount the disk first which have been mounted last. So instead of remounting in this order: /dev/sda1 /dev/loop1 /dev/loop2 we now remount in this order: /dev/loop2 /dev/loop1 /dev/sda1 Change-Id: I68fe7e16cc9400ab5278877af70c9ea1d9b57936 Signed-off-by: Christian Poetzsch --- fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/super.c b/fs/super.c index c183835566c1..0bed501a60be 100644 --- a/fs/super.c +++ b/fs/super.c @@ -837,7 +837,7 @@ static void do_emergency_remount(struct work_struct *work) struct super_block *sb, *p = NULL; spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { + list_for_each_entry_reverse(sb, &super_blocks, s_list) { if (hlist_unhashed(&sb->s_instances)) continue; sb->s_count++; -- GitLab From eb5d5508df713ecf0d30ff04ab918a99eadc50dd Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:16 +0000 Subject: [PATCH 0967/1442] ANDROID: epoll: use freezable blocking call Avoid waking up every thread sleeping in an epoll_wait call during suspend and resume by calling a freezable blocking call. Previous patches modified the freezer to avoid sending wakeups to threads that are blocked in freezable blocking calls. This call was selected to be converted to a freezable call because it doesn't hold any locks or release any resources when interrupted that might be needed by another freezing task or a kernel driver during suspend, and is a common site where idle userspace tasks are blocked. Change-Id: I848d08d28c89302fd42bbbdfa76489a474ab27bf Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- fs/eventpoll.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 10db91218933..dc4a34f9c859 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1663,7 +1664,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } spin_unlock_irqrestore(&ep->lock, flags); - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!freezable_schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS)) timed_out = 1; spin_lock_irqsave(&ep->lock, flags); -- GitLab From fe1e63abc31c63ceb3e5757d2010a0b2046275db Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Sat, 26 Dec 2015 20:25:02 +0530 Subject: [PATCH 0968/1442] ANDROID: goldfish: pipe: fix platform_no_drv_owner.cocci warnings No need to set .owner here. The core will do it. Remove .owner field if calls are used which set it automatically Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci CC: Lizhe Liu Signed-off-by: Fengguang Wu Signed-off-by: Amit Pundir --- drivers/platform/goldfish/goldfish_pipe.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 1aba2c74160e..198d16da025d 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -662,7 +662,6 @@ static struct platform_driver goldfish_pipe = { .remove = goldfish_pipe_remove, .driver = { .name = "goldfish_pipe", - .owner = THIS_MODULE, .of_match_table = goldfish_pipe_of_match, .acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match), } -- GitLab From 78ab6c44983d584193396358691919f261483b16 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Tue, 10 Nov 2015 10:53:55 -0800 Subject: [PATCH 0969/1442] ANDROID: wakeup: Add the guard condition for len in pm_get_active_wakeup_sources Check if the len is not greater than maximum to prevent buffer overflow. Signed-off-by: Ruchi Kandoi Change-Id: I575b0a72bb5448b68353408d71fa8b83420c9088 --- drivers/base/power/wakeup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index e899fdafcd74..16d307b13ccc 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -811,7 +811,7 @@ void pm_get_active_wakeup_sources(char *pending_wakeup_source, size_t max) rcu_read_lock(); list_for_each_entry_rcu(ws, &wakeup_sources, entry) { - if (ws->active) { + if (ws->active && len < max) { if (!active) len += scnprintf(pending_wakeup_source, max, "Pending Wakeup Sources: "); -- GitLab From f9f1014d29c7a2415a973acba6518d302f56d391 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 29 Dec 2015 12:59:31 +0530 Subject: [PATCH 0970/1442] ANDROID: skip building drivers as modules Few Android drivers e.g. uid_cputime, keyreset/combo fail to build as kernel modules. Instead of fixing the build lets make these drivers non-modular (switch config to "bool" from "tristate" in Kconfig) since Android doesn't support building kernel modules anyway. Change-Id: I50c66228c38c8b7c221ec7d6773fcb59bd201be9 Signed-off-by: Amit Pundir --- drivers/input/Kconfig | 10 ++-------- drivers/misc/Kconfig | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index 5d954cc8cab5..34ffa0257b4b 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig @@ -188,24 +188,18 @@ config INPUT_APMPOWER module will be called apm-power. config INPUT_KEYRESET - tristate "Reset key" + bool "Reset key" depends on INPUT select INPUT_KEYCOMBO ---help--- Say Y here if you want to reboot when some keys are pressed; - To compile this driver as a module, choose M here: the - module will be called keyreset. - config INPUT_KEYCOMBO - tristate "Key combo" + bool "Key combo" depends on INPUT ---help--- Say Y here if you want to take action when some keys are pressed; - To compile this driver as a module, choose M here: the - module will be called keycombo. - comment "Input Device Drivers" source "drivers/input/keyboard/Kconfig" diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 1e3e5b88264f..8d0e3475480f 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -767,7 +767,7 @@ config PANEL_BOOT_MESSAGE printf()-formatted message is valid with newline and escape codes. config UID_CPUTIME - tristate "Per-UID cpu time statistics" + bool "Per-UID cpu time statistics" depends on PROFILING help Per UID based cpu time statistics exported to /proc/uid_cputime -- GitLab From 01d8cc5436f219a4a757d93c93176d14545b4060 Mon Sep 17 00:00:00 2001 From: Tim Murray Date: Tue, 19 Jan 2016 16:36:40 -0800 Subject: [PATCH 0971/1442] ANDROID: mmc: move to a SCHED_FIFO thread (cherry picked from commit 011e507b413393eab8279dac8b778ad9b6e9971b) Running mmcqd as a prio 120 thread forces it to compete with standard user processes for IO performance, especially when the system is under severe CPU load. Move it to a SCHED_FIFO thread to reduce the impact of load on IO performance. Signed-off-by: Tim Murray Bug: 25392275 Change-Id: I1edfe73baa25e181367c30c1f40fee886e92b60d --- drivers/mmc/card/queue.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 8037f73a109a..1810f765f0d1 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -19,6 +19,7 @@ #include #include +#include #include "queue.h" #include "block.h" @@ -53,6 +54,11 @@ static int mmc_queue_thread(void *d) { struct mmc_queue *mq = d; struct request_queue *q = mq->queue; + struct sched_param scheduler_params = {0}; + + scheduler_params.sched_priority = 1; + + sched_setscheduler(current, SCHED_FIFO, &scheduler_params); current->flags |= PF_MEMALLOC; -- GitLab From 5fb1ae07029fdaf8b9ea00f07344d1e8f7d419af Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 29 Feb 2016 17:40:05 -0800 Subject: [PATCH 0972/1442] ANDROID: lowmemorykiller: fix declaration order warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/staging/android/lowmemorykiller.c: In function ‘lowmem_scan’: drivers/staging/android/lowmemorykiller.c:174:3: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement] Change-Id: I9de6cf2c374bc43131725a7ed666a033a4449ea9 Signed-off-by: Brian Norris --- drivers/staging/android/lowmemorykiller.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index eaae2f7921e2..6dcde85f05dc 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -163,14 +163,15 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) p->comm, p->pid, oom_score_adj, tasksize); } if (selected) { + long cache_size = other_file * (long)(PAGE_SIZE / 1024); + long cache_limit = minfree * (long)(PAGE_SIZE / 1024); + long free = other_free * (long)(PAGE_SIZE / 1024); + task_lock(selected); send_sig(SIGKILL, selected, 0); if (selected->mm) task_set_lmk_waiting(selected); task_unlock(selected); - long cache_size = other_file * (long)(PAGE_SIZE / 1024); - long cache_limit = minfree * (long)(PAGE_SIZE / 1024); - long free = other_free * (long)(PAGE_SIZE / 1024); trace_lowmemory_kill(selected, cache_size, cache_limit, free); lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" " to free %ldkB on behalf of '%s' (%d) because\n" -- GitLab From 557d80f284d4027f40b230732df654068b59e8d6 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 29 Feb 2016 17:44:51 -0800 Subject: [PATCH 0973/1442] ANDROID: usb: gadget: f_mtp: don't use le16 for u8 field The 'bCount' field is u8. Noticed by this warning: drivers/usb/gadget/function/f_mtp.c:264:3: warning: large integer implicitly truncated to unsigned type [-Woverflow] Change-Id: Ie82dfd1a8986ecd3acf143e41c46822f0d1aca4f Signed-off-by: Brian Norris --- drivers/usb/gadget/function/f_mtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index aec7b8d61fe7..8f80a7e91314 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -261,7 +261,7 @@ struct { .dwLength = __constant_cpu_to_le32(sizeof(mtp_ext_config_desc)), .bcdVersion = __constant_cpu_to_le16(0x0100), .wIndex = __constant_cpu_to_le16(4), - .bCount = __constant_cpu_to_le16(1), + .bCount = 1, }, .function = { .bFirstInterfaceNumber = 0, -- GitLab From b5ea92ffa88e9d4d6ba8fc2da32be942ed87ce88 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 29 Feb 2016 17:42:29 -0800 Subject: [PATCH 0974/1442] ANDROID: kernel/watchdog: fix unused variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kernel/watchdog.c:122:22: warning: ‘hardlockup_allcpu_dumped’ defined but not used [-Wunused-variable] Change-Id: I99e97e7cc31b589cd674fd4495832c9ef036d0b9 Signed-off-by: Brian Norris --- kernel/watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index cd33631c7257..a1f78d4b41b6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -120,7 +120,7 @@ static unsigned long soft_lockup_nmi_warn; #ifdef CONFIG_HARDLOCKUP_DETECTOR unsigned int __read_mostly hardlockup_panic = CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; -static unsigned long hardlockup_allcpu_dumped; +static unsigned long __maybe_unused hardlockup_allcpu_dumped; /* * We may not want to enable hard lockup detection by default in all cases, * for example when running the kernel as a guest on a hypervisor. In these -- GitLab From 4a623fb5ce7a641b08fc187d6380f9524da21c15 Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Tue, 10 Nov 2015 14:11:46 -0800 Subject: [PATCH 0975/1442] ANDROID: mmc: sdio: Disable retuning in sdio_reset_comm() Since sdio_reset_comm() re-initializes the SDIO card, disable retuning before idling and shutting down the card. Tuning will be re-enabled (if necessary) in mmc_sdio_init_card(). BUG=chrome-os-partner:46444 TEST=With CL:311815, toggle WiFi on/off on Smaug and observe that the WiFi card comes back up and is able to tune successfully. Change-Id: Ib4a5cfd4d75fc9e3ed7bb3f1e2ffd30de16c5d28 Signed-off-by: Andrew Bresticker Reviewed-on: https://chromium-review.googlesource.com/311797 Reviewed-by: Derek Basehore [briannorris: brought from Chromium kernel in 3.18 -> 4.4 rebase] Signed-off-by: Brian Norris --- drivers/mmc/core/sdio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 324116600333..b5ec3c8cf580 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -22,6 +22,7 @@ #include "core.h" #include "bus.h" +#include "host.h" #include "sd.h" #include "sdio_bus.h" #include "mmc_ops.h" @@ -1226,6 +1227,8 @@ int sdio_reset_comm(struct mmc_card *card) printk("%s():\n", __func__); mmc_claim_host(host); + mmc_retune_disable(host); + mmc_go_idle(host); mmc_set_clock(host, host->f_min); -- GitLab From 9079f248f697f5d79ac75b8bae919fbadabcbfdc Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 1 Mar 2016 09:44:17 -0800 Subject: [PATCH 0976/1442] ANDROID: net: pppolac/pppopns: Replace msg.msg_iov with iov_iter_kvec() Commit 1af89c1ef3b6 ("Hack: net: PPPoPNS and PPPoLAC build fixes for 4.1") fixed the build for PPPoPNS and PPPoLAC by re-introducing a field in struct msghdr which was removed upstream. Re-introducing the field doesn't get it used, so it is quite likely that the code never worked. Fix it up for good. Fixes: 1af89c1ef3b6 ("Hack: net: PPPoPNS and PPPoLAC build fixes for 4.1") Signed-off-by: Guenter Roeck --- drivers/net/ppp/pppolac.c | 9 ++++----- drivers/net/ppp/pppopns.c | 9 ++++----- include/linux/socket.h | 4 ---- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/net/ppp/pppolac.c b/drivers/net/ppp/pppolac.c index 1b8180cc1d4d..0184c96579e9 100644 --- a/drivers/net/ppp/pppolac.c +++ b/drivers/net/ppp/pppolac.c @@ -206,11 +206,10 @@ static void pppolac_xmit_core(struct work_struct *delivery_work) while ((skb = skb_dequeue(&delivery_queue))) { struct sock *sk_udp = skb->sk; struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len}; - struct msghdr msg = { - .msg_iov = (struct iovec *)&iov, - .msg_iovlen = 1, - .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT, - }; + struct msghdr msg = { 0 }; + + iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, + skb->len); sk_udp->sk_prot->sendmsg(sk_udp, &msg, skb->len); kfree_skb(skb); } diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c index 568bb45cfeac..d9e06039794e 100644 --- a/drivers/net/ppp/pppopns.c +++ b/drivers/net/ppp/pppopns.c @@ -189,11 +189,10 @@ static void pppopns_xmit_core(struct work_struct *delivery_work) while ((skb = skb_dequeue(&delivery_queue))) { struct sock *sk_raw = skb->sk; struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len}; - struct msghdr msg = { - .msg_iov = (struct iovec *)&iov, - .msg_iovlen = 1, - .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT, - }; + struct msghdr msg = { 0 }; + + iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, + skb->len); sk_raw->sk_prot->sendmsg(sk_raw, &msg, skb->len); kfree_skb(skb); } diff --git a/include/linux/socket.h b/include/linux/socket.h index 09524774ea3f..b5cc5a6d7011 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -47,10 +47,6 @@ struct linger { struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ -#if defined(CONFIG_PPPOLAC) || defined(CONFIG_PPPOPNS) - struct iovec *msg_iov; /* scatter/gather array */ - __kernel_size_t msg_iovlen; /* # elements in msg_iov */ -#endif struct iov_iter msg_iter; /* data */ void *msg_control; /* ancillary data */ __kernel_size_t msg_controllen; /* ancillary data buffer length */ -- GitLab From a4912d5074f48285373f4f3125a99833fbcc5500 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 3 Mar 2016 10:33:40 -0800 Subject: [PATCH 0977/1442] ANDROID: PM / suspend: Add dependency on RTC_LIB Commit 1eff8f99f9f9 ("PM / Suspend: Print wall time at suspend entry and exit") calls rtc_time_to_tm(), which in turn calls rtc_time64_to_tm(). Since RTC_LIB is not mandatory for all architetures, this can result in the following build error. suspend.c:(.text+0x2f36c): undefined reference to `rtc_time64_to_tm' rtc_time64_to_tm() is implemented in rtc-lib, so SUSPEND now needs to select RTC_LIB. Fixes: 1eff8f99f9f9 ("PM / Suspend: Print wall time at suspend entry and exit") Signed-off-by: Guenter Roeck --- kernel/power/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index e8517b63eb37..f4330a2842a3 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -1,6 +1,7 @@ config SUSPEND bool "Suspend to RAM and standby" depends on ARCH_SUSPEND_POSSIBLE + select RTC_LIB default y ---help--- Allow the system to enter sleep states in which main memory is -- GitLab From 7961972600baf8bb5588b7fefbf0455deb9542bf Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 4 Mar 2016 07:22:27 -0800 Subject: [PATCH 0978/1442] ANDROID: power: Provide dummy log_suspend_abort_reason() if SUSPEND is disabled The API to log the suspend reason was introduced with commit 57caa2ad5ce3 ("power: Adds functionality to log the last suspend abort reason."). It is called from functions enabled with PM_SLEEP and from functions enabled with SUSPEND, but only available if SUSPEND is enabled. This can result in build failures such as the following if PM_SLEEP is enabled, but SUSPEND is not. kernel/built-in.o: In function `try_to_freeze_tasks': process.c:(.text+0x30928): undefined reference to `log_suspend_abort_reason' drivers/built-in.o: In function `syscore_suspend': (.text+0x6e250): undefined reference to `log_suspend_abort_reason' drivers/built-in.o: In function `__device_suspend': main.c:(.text+0x7a528): undefined reference to `log_suspend_abort_reason' Fixes: 57caa2ad5ce3 ("power: Adds functionality to log the last suspend abort reason.") Signed-off-by: Guenter Roeck --- include/linux/wakeup_reason.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h index ad8b76936c7f..d84d8c301546 100644 --- a/include/linux/wakeup_reason.h +++ b/include/linux/wakeup_reason.h @@ -21,7 +21,12 @@ #define MAX_SUSPEND_ABORT_LEN 256 void log_wakeup_reason(int irq); -void log_suspend_abort_reason(const char *fmt, ...); int check_wakeup_reason(int irq); +#ifdef CONFIG_SUSPEND +void log_suspend_abort_reason(const char *fmt, ...); +#else +static inline void log_suspend_abort_reason(const char *fmt, ...) { } +#endif + #endif /* _LINUX_WAKEUP_REASON_H */ -- GitLab From e97c4ed917526212c67cb732099bef047c8a0df2 Mon Sep 17 00:00:00 2001 From: Tim Murray Date: Tue, 19 Jan 2016 16:33:27 -0800 Subject: [PATCH 0979/1442] ANDROID: dm-crypt: run in a WQ_HIGHPRI workqueue (cherry pick from commit ad3ac5180979e5dd1f84e4a807f76fb9fb19f814) Running dm-crypt in a standard workqueue results in IO competing for CPU time with standard user apps, which can lead to pipeline bubbles and seriously degraded performance. Move to a WQ_HIGHPRI workqueue to protect against that. Signed-off-by: Tim Murray Bug: 25392275 Change-Id: I2828587c754a7c2cafdd78b3323b9896cb8cd4e7 --- drivers/md/dm-crypt.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index a2768835d394..715cc52ab01b 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1863,16 +1863,24 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ret = -ENOMEM; - cc->io_queue = alloc_workqueue("kcryptd_io", WQ_MEM_RECLAIM, 1); + cc->io_queue = alloc_workqueue("kcryptd_io", + WQ_HIGHPRI | + WQ_MEM_RECLAIM, + 1); if (!cc->io_queue) { ti->error = "Couldn't create kcryptd io queue"; goto bad; } if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) - cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1); + cc->crypt_queue = alloc_workqueue("kcryptd", + WQ_HIGHPRI | + WQ_MEM_RECLAIM, 1); else - cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, + cc->crypt_queue = alloc_workqueue("kcryptd", + WQ_HIGHPRI | + WQ_MEM_RECLAIM | + WQ_UNBOUND, num_online_cpus()); if (!cc->crypt_queue) { ti->error = "Couldn't create kcryptd queue"; -- GitLab From b115a6fe6d4a66e89f7c43c4027db7ddb677c751 Mon Sep 17 00:00:00 2001 From: Mark Kuo Date: Thu, 20 Aug 2015 13:01:46 +0800 Subject: [PATCH 0980/1442] CHROMIUM: usb: gadget: f_mtp: Add SuperSpeed support Add SuperSpeed endpoint and companion descriptors. BUG=chrome-os-partner:43682 TEST=Smaug enumerates as a SuperSpeed device. Change-Id: I2bf3125d180fcb07222a5740fa67f3526cf3e95c Signed-off-by: Hui Fu Signed-off-by: Henry Lin Signed-off-by: Mark Kuo Signed-off-by: Andrew Bresticker Reviewed-on: https://chromium-review.googlesource.com/294950 --- drivers/usb/gadget/function/f_mtp.c | 76 ++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index 8f80a7e91314..148f8fcecc80 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -135,6 +135,34 @@ static struct usb_interface_descriptor ptp_interface_desc = { .bInterfaceProtocol = 1, }; +static struct usb_endpoint_descriptor mtp_ss_in_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_IN, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(1024), +}; + +static struct usb_ss_ep_comp_descriptor mtp_ss_in_comp_desc = { + .bLength = sizeof(mtp_ss_in_comp_desc), + .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, + /* .bMaxBurst = DYNAMIC, */ +}; + +static struct usb_endpoint_descriptor mtp_ss_out_desc = { + .bLength = USB_DT_ENDPOINT_SIZE, + .bDescriptorType = USB_DT_ENDPOINT, + .bEndpointAddress = USB_DIR_OUT, + .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = __constant_cpu_to_le16(1024), +}; + +static struct usb_ss_ep_comp_descriptor mtp_ss_out_comp_desc = { + .bLength = sizeof(mtp_ss_out_comp_desc), + .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, + /* .bMaxBurst = DYNAMIC, */ +}; + static struct usb_endpoint_descriptor mtp_highspeed_in_desc = { .bLength = USB_DT_ENDPOINT_SIZE, .bDescriptorType = USB_DT_ENDPOINT, @@ -174,6 +202,12 @@ static struct usb_endpoint_descriptor mtp_intr_desc = { .bInterval = 6, }; +static struct usb_ss_ep_comp_descriptor mtp_intr_ss_comp_desc = { + .bLength = sizeof(mtp_intr_ss_comp_desc), + .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, + .wBytesPerInterval = cpu_to_le16(2), +}; + static struct usb_descriptor_header *fs_mtp_descs[] = { (struct usb_descriptor_header *) &mtp_interface_desc, (struct usb_descriptor_header *) &mtp_fullspeed_in_desc, @@ -190,6 +224,17 @@ static struct usb_descriptor_header *hs_mtp_descs[] = { NULL, }; +static struct usb_descriptor_header *ss_mtp_descs[] = { + (struct usb_descriptor_header *) &mtp_interface_desc, + (struct usb_descriptor_header *) &mtp_ss_in_desc, + (struct usb_descriptor_header *) &mtp_ss_in_comp_desc, + (struct usb_descriptor_header *) &mtp_ss_out_desc, + (struct usb_descriptor_header *) &mtp_ss_out_comp_desc, + (struct usb_descriptor_header *) &mtp_intr_desc, + (struct usb_descriptor_header *) &mtp_intr_ss_comp_desc, + NULL, +}; + static struct usb_descriptor_header *fs_ptp_descs[] = { (struct usb_descriptor_header *) &ptp_interface_desc, (struct usb_descriptor_header *) &mtp_fullspeed_in_desc, @@ -206,6 +251,17 @@ static struct usb_descriptor_header *hs_ptp_descs[] = { NULL, }; +static struct usb_descriptor_header *ss_ptp_descs[] = { + (struct usb_descriptor_header *) &ptp_interface_desc, + (struct usb_descriptor_header *) &mtp_ss_in_desc, + (struct usb_descriptor_header *) &mtp_ss_in_comp_desc, + (struct usb_descriptor_header *) &mtp_ss_out_desc, + (struct usb_descriptor_header *) &mtp_ss_out_comp_desc, + (struct usb_descriptor_header *) &mtp_intr_desc, + (struct usb_descriptor_header *) &mtp_intr_ss_comp_desc, + NULL, +}; + static struct usb_string mtp_string_defs[] = { /* Naming interface "MTP" so libmtp will recognize us */ [INTERFACE_STRING_INDEX].s = "MTP", @@ -1131,10 +1187,24 @@ mtp_function_bind(struct usb_configuration *c, struct usb_function *f) mtp_highspeed_out_desc.bEndpointAddress = mtp_fullspeed_out_desc.bEndpointAddress; } + /* support super speed hardware */ + if (gadget_is_superspeed(c->cdev->gadget)) { + unsigned max_burst; + + /* Calculate bMaxBurst, we know packet size is 1024 */ + max_burst = min_t(unsigned, MTP_BULK_BUFFER_SIZE / 1024, 15); + mtp_ss_in_desc.bEndpointAddress = + mtp_fullspeed_in_desc.bEndpointAddress; + mtp_ss_in_comp_desc.bMaxBurst = max_burst; + mtp_ss_out_desc.bEndpointAddress = + mtp_fullspeed_out_desc.bEndpointAddress; + mtp_ss_out_comp_desc.bMaxBurst = max_burst; + } DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n", - gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full", - f->name, dev->ep_in->name, dev->ep_out->name); + gadget_is_superspeed(c->cdev->gadget) ? "super" : + (gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full"), + f->name, dev->ep_in->name, dev->ep_out->name); return 0; } @@ -1410,9 +1480,11 @@ struct usb_function *function_alloc_mtp_ptp(struct usb_function_instance *fi, if (mtp_config) { dev->function.fs_descriptors = fs_mtp_descs; dev->function.hs_descriptors = hs_mtp_descs; + dev->function.ss_descriptors = ss_mtp_descs; } else { dev->function.fs_descriptors = fs_ptp_descs; dev->function.hs_descriptors = hs_ptp_descs; + dev->function.ss_descriptors = ss_ptp_descs; } dev->function.bind = mtp_function_bind; dev->function.unbind = mtp_function_unbind; -- GitLab From 09cd36b89af6158a3fdefaa4322af723e6e6981f Mon Sep 17 00:00:00 2001 From: Mark Kuo Date: Fri, 11 Sep 2015 16:12:59 +0800 Subject: [PATCH 0981/1442] CHROMIUM: usb: gadget: f_mtp: fix usb_ss_ep_comp_descriptor wBytesPerInterval in SuperSpeed Endpoint Companion Descriptor needs to be set large enough to reserve enough bus time for associated periodic endpoint. Originally, wBytesPerInterval for mtp's interrupt IN endpoint is set to 2 and its single interrupt transfer will be split into many 2 bytes interrupt transfers. So, we change wBytesPerInterval to INTR_BUFFER_SIZE to ensure interrupt transfer will not be split. BUG=none TEST=Smaug works as a MTP device Change-Id: I49c0df892b2d9e0193a684eef23f73664ced9f91 Signed-off-by: Henry Lin Signed-off-by: Mark Kuo Reviewed-on: https://chromium-review.googlesource.com/299091 Reviewed-by: Andrew Bresticker --- drivers/usb/gadget/function/f_mtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index 148f8fcecc80..7f5c390885fe 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -205,7 +205,7 @@ static struct usb_endpoint_descriptor mtp_intr_desc = { static struct usb_ss_ep_comp_descriptor mtp_intr_ss_comp_desc = { .bLength = sizeof(mtp_intr_ss_comp_desc), .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, - .wBytesPerInterval = cpu_to_le16(2), + .wBytesPerInterval = cpu_to_le16(INTR_BUFFER_SIZE), }; static struct usb_descriptor_header *fs_mtp_descs[] = { -- GitLab From 1aaf997fd27068fb79d24b81848102f7d95e7fce Mon Sep 17 00:00:00 2001 From: Mark Kuo Date: Mon, 11 Jan 2016 17:49:16 +0800 Subject: [PATCH 0982/1442] CHROMIUM: usb: gadget: audio_source: add .free_func callback When userspace unbinds gadget functions through configfs, the .free_func() callback is always invoked. (in config_usb_cfg_unlink()) Implement it as a no-op to avoid the following crash: [ 68.125679] configfs-gadget gadget: unbind function 'accessory'/ffffffc0720bf000 [ 68.133202] configfs-gadget gadget: unbind function 'audio_source'/ffffffc0012ca3c0 [ 68.142668] tegra-xudc 700d0000.usb-device: ep 0 disabled [ 68.148186] Bad mode in Synchronous Abort handler detected, code 0x86000006 [ 68.155144] CPU: 2 PID: 1 Comm: init Tainted: G U W 3.18.0-09419-g87296c3-dirty #561 [ 68.163743] Hardware name: Google Tegra210 Smaug Rev 1,3+ (DT) [ 68.169566] task: ffffffc0bc8d0000 ti: ffffffc0bc8bc000 task.ti: ffffffc0bc8bc000 [ 68.177039] PC is at 0x0 [ 68.179577] LR is at usb_put_function+0x14/0x1c .... BUG=chrome-os-partner:49140 TEST="setprop sys.usb.config accessory,audio_source" on A44 and then switch back to default: "setprop sys.usb.config mtp,adb", no crash will be seen. Change-Id: I5b6141964aab861e86e3afb139ded02d4d122dab Signed-off-by: Mark Kuo Reviewed-on: https://chromium-review.googlesource.com/321013 Commit-Ready: Andrew Bresticker Tested-by: Andrew Bresticker Reviewed-by: Andrew Bresticker --- drivers/usb/gadget/function/f_audio_source.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/gadget/function/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c index 39645be93502..bcd817439dbf 100644 --- a/drivers/usb/gadget/function/f_audio_source.c +++ b/drivers/usb/gadget/function/f_audio_source.c @@ -583,6 +583,11 @@ static void audio_disable(struct usb_function *f) usb_ep_disable(audio->in_ep); } +static void audio_free_func(struct usb_function *f) +{ + /* no-op */ +} + /*-------------------------------------------------------------------------*/ static void audio_build_desc(struct audio_dev *audio) @@ -827,6 +832,7 @@ static struct audio_dev _audio_dev = { .set_alt = audio_set_alt, .setup = audio_setup, .disable = audio_disable, + .free_func = audio_free_func, }, .lock = __SPIN_LOCK_UNLOCKED(_audio_dev.lock), .idle_reqs = LIST_HEAD_INIT(_audio_dev.idle_reqs), -- GitLab From 90f58eabe7060322c01d1d4f4858209559ad7d0c Mon Sep 17 00:00:00 2001 From: Mark Kuo Date: Mon, 11 Jan 2016 19:07:12 +0800 Subject: [PATCH 0983/1442] CHROMIUM: usb: gadget: f_accessory: add .raw_request callback After this upstream commit: 3c86726cfe38952f0366f86acfbbb025813ec1c2, .raw_request is mandatory in hid_ll_driver structure, hence add an empty raw_request() function. BUG=chrome-os-partner:49140 TEST=none Change-Id: Idd0bbe6960aad2c557376e4a24827d7e1df8e023 Signed-off-by: Mark Kuo Reviewed-on: https://chromium-review.googlesource.com/321038 Commit-Ready: Andrew Bresticker Tested-by: Andrew Bresticker Reviewed-by: Andrew Bresticker --- drivers/usb/gadget/function/f_accessory.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index 1be93a7ca4a1..c62123560143 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -404,12 +404,19 @@ static void acc_hid_close(struct hid_device *hid) { } +static int acc_hid_raw_request(struct hid_device *hid, unsigned char reportnum, + __u8 *buf, size_t len, unsigned char rtype, int reqtype) +{ + return 0; +} + static struct hid_ll_driver acc_hid_ll_driver = { .parse = acc_hid_parse, .start = acc_hid_start, .stop = acc_hid_stop, .open = acc_hid_open, .close = acc_hid_close, + .raw_request = acc_hid_raw_request, }; static struct acc_hid_dev *acc_hid_new(struct acc_dev *dev, -- GitLab From 38333b044db66750ebc3f25494add889aee6dc82 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 6 Oct 2015 20:32:01 -0700 Subject: [PATCH 0984/1442] ANDROID: usb: gadget: Add support for MTP OS desc Windows requires OS specific descriptors for automatic install of drivers for MTP devices. https://msdn.microsoft.com/en-us/library/windows/ hardware/gg463179.aspx BUG=24583401 BUG=chrome-os-partner:43409 Change-Id: I9397072ca3d183efbc9571c6cde3790f10d8851e Signed-off-by: Badhri Jagan Sridharan Reviewed-on: https://chromium-review.googlesource.com/304346 Commit-Ready: Andrew Bresticker Tested-by: Andrew Bresticker Reviewed-by: Andrew Bresticker Signed-off-by: Amit Pundir --- drivers/usb/gadget/function/f_mtp.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index 7f5c390885fe..e21d3e05a4af 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -346,6 +346,8 @@ struct mtp_instance { struct usb_function_instance func_inst; const char *name; struct mtp_dev *dev; + char mtp_ext_compat_id[16]; + struct usb_os_desc mtp_os_desc; }; /* temporary variable used between mtp_open() and mtp_gadget_bind() */ @@ -1157,6 +1159,7 @@ mtp_function_bind(struct usb_configuration *c, struct usb_function *f) struct mtp_dev *dev = func_to_mtp(f); int id; int ret; + struct mtp_instance *fi_mtp; dev->cdev = cdev; DBG(cdev, "mtp_function_bind dev: %p\n", dev); @@ -1174,6 +1177,18 @@ mtp_function_bind(struct usb_configuration *c, struct usb_function *f) mtp_string_defs[INTERFACE_STRING_INDEX].id = ret; mtp_interface_desc.iInterface = ret; } + + fi_mtp = container_of(f->fi, struct mtp_instance, func_inst); + + if (cdev->use_os_string) { + f->os_desc_table = kzalloc(sizeof(*f->os_desc_table), + GFP_KERNEL); + if (!f->os_desc_table) + return -ENOMEM; + f->os_desc_n = 1; + f->os_desc_table[0].os_desc = &fi_mtp->mtp_os_desc; + } + /* allocate endpoints */ ret = mtp_create_bulk_endpoints(dev, &mtp_fullspeed_in_desc, &mtp_fullspeed_out_desc, &mtp_intr_desc); @@ -1223,6 +1238,8 @@ mtp_function_unbind(struct usb_configuration *c, struct usb_function *f) while ((req = mtp_req_get(dev, &dev->intr_idle))) mtp_request_free(req, dev->ep_intr); dev->state = STATE_OFFLINE; + kfree(f->os_desc_table); + f->os_desc_n = 0; } static int mtp_function_set_alt(struct usb_function *f, @@ -1413,6 +1430,8 @@ struct usb_function_instance *alloc_inst_mtp_ptp(bool mtp_config) { struct mtp_instance *fi_mtp; int ret = 0; + struct usb_os_desc *descs[1]; + char *names[1]; fi_mtp = kzalloc(sizeof(*fi_mtp), GFP_KERNEL); if (!fi_mtp) @@ -1420,6 +1439,11 @@ struct usb_function_instance *alloc_inst_mtp_ptp(bool mtp_config) fi_mtp->func_inst.set_inst_name = mtp_set_inst_name; fi_mtp->func_inst.free_func_inst = mtp_free_inst; + fi_mtp->mtp_os_desc.ext_compat_id = fi_mtp->mtp_ext_compat_id; + INIT_LIST_HEAD(&fi_mtp->mtp_os_desc.ext_prop); + descs[0] = &fi_mtp->mtp_os_desc; + names[0] = "MTP"; + if (mtp_config) { ret = mtp_setup_configfs(fi_mtp); if (ret) { @@ -1432,6 +1456,8 @@ struct usb_function_instance *alloc_inst_mtp_ptp(bool mtp_config) config_group_init_type_name(&fi_mtp->func_inst.group, "", &mtp_func_type); + usb_os_desc_prepare_interf_dir(&fi_mtp->func_inst.group, 1, + descs, names, THIS_MODULE); return &fi_mtp->func_inst; } -- GitLab From 35c9e242a556b41d276f218a768be25b4140e845 Mon Sep 17 00:00:00 2001 From: Daniel Campello Date: Mon, 20 Jul 2015 16:23:50 -0700 Subject: [PATCH 0985/1442] ANDROID: Included sdcardfs source code for kernel 3.0 Only included the source code as is for kernel 3.0. Following patches take care of porting this file system to version 3.10. Change-Id: I09e76db77cd98a059053ba5b6fd88572a4b75b5b Signed-off-by: Daniel Campello --- fs/Kconfig | 1 + fs/Makefile | 5 +- fs/sdcardfs/Kconfig | 18 + fs/sdcardfs/Makefile | 7 + fs/sdcardfs/dentry.c | 182 ++++++++ fs/sdcardfs/derived_perm.c | 290 ++++++++++++ fs/sdcardfs/file.c | 357 +++++++++++++++ fs/sdcardfs/hashtable.h | 190 ++++++++ fs/sdcardfs/inode.c | 886 +++++++++++++++++++++++++++++++++++++ fs/sdcardfs/lookup.c | 386 ++++++++++++++++ fs/sdcardfs/main.c | 425 ++++++++++++++++++ fs/sdcardfs/mmap.c | 82 ++++ fs/sdcardfs/multiuser.h | 37 ++ fs/sdcardfs/packagelist.c | 458 +++++++++++++++++++ fs/sdcardfs/sdcardfs.h | 493 +++++++++++++++++++++ fs/sdcardfs/strtok.h | 75 ++++ fs/sdcardfs/super.c | 229 ++++++++++ include/linux/namei.h | 3 + include/uapi/linux/magic.h | 2 + 19 files changed, 4124 insertions(+), 2 deletions(-) create mode 100644 fs/sdcardfs/Kconfig create mode 100644 fs/sdcardfs/Makefile create mode 100644 fs/sdcardfs/dentry.c create mode 100644 fs/sdcardfs/derived_perm.c create mode 100644 fs/sdcardfs/file.c create mode 100644 fs/sdcardfs/hashtable.h create mode 100644 fs/sdcardfs/inode.c create mode 100644 fs/sdcardfs/lookup.c create mode 100644 fs/sdcardfs/main.c create mode 100644 fs/sdcardfs/mmap.c create mode 100644 fs/sdcardfs/multiuser.h create mode 100644 fs/sdcardfs/packagelist.c create mode 100644 fs/sdcardfs/sdcardfs.h create mode 100644 fs/sdcardfs/strtok.h create mode 100644 fs/sdcardfs/super.c diff --git a/fs/Kconfig b/fs/Kconfig index 4bd03a2b0518..20a8d95b8b3d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -227,6 +227,7 @@ source "fs/orangefs/Kconfig" source "fs/adfs/Kconfig" source "fs/affs/Kconfig" source "fs/ecryptfs/Kconfig" +source "fs/sdcardfs/Kconfig" source "fs/hfs/Kconfig" source "fs/hfsplus/Kconfig" source "fs/befs/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index ed2b63257ba9..f207d4303052 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -3,7 +3,7 @@ # # 14 Sep 2000, Christoph Hellwig # Rewritten to use lists instead of if-statements. -# +# obj-y := open.o read_write.o file_table.o super.o \ char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ @@ -61,7 +61,7 @@ obj-y += devpts/ obj-$(CONFIG_PROFILING) += dcookies.o obj-$(CONFIG_DLM) += dlm/ - + # Do not add any filesystems before this line obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ @@ -83,6 +83,7 @@ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ obj-$(CONFIG_HFS_FS) += hfs/ obj-$(CONFIG_ECRYPT_FS) += ecryptfs/ +obj-$(CONFIG_SDCARD_FS) += sdcardfs/ obj-$(CONFIG_VXFS_FS) += freevxfs/ obj-$(CONFIG_NFS_FS) += nfs/ obj-$(CONFIG_EXPORTFS) += exportfs/ diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig new file mode 100644 index 000000000000..657f4958e8d6 --- /dev/null +++ b/fs/sdcardfs/Kconfig @@ -0,0 +1,18 @@ +config SDCARD_FS + tristate "sdcard file system" + depends on EXPERIMENTAL + default n + help + Sdcardfs is based on Wrapfs file system. + +config SDCARD_FS_FADV_NOACTIVE + bool "sdcardfs fadvise noactive support" + depends on FADV_NOACTIVE + default y + help + Sdcardfs supports fadvise noactive mode. + +config SDCARD_FS_CI_SEARCH + tristate "sdcardfs case-insensitive search support" + depends on SDCARD_FS + default y diff --git a/fs/sdcardfs/Makefile b/fs/sdcardfs/Makefile new file mode 100644 index 000000000000..b84fbb2b45a4 --- /dev/null +++ b/fs/sdcardfs/Makefile @@ -0,0 +1,7 @@ +SDCARDFS_VERSION="0.1" + +EXTRA_CFLAGS += -DSDCARDFS_VERSION=\"$(SDCARDFS_VERSION)\" + +obj-$(CONFIG_SDCARD_FS) += sdcardfs.o + +sdcardfs-y := dentry.o file.o inode.o main.o super.o lookup.o mmap.o packagelist.o derived_perm.o diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c new file mode 100644 index 000000000000..4572a5403bb2 --- /dev/null +++ b/fs/sdcardfs/dentry.c @@ -0,0 +1,182 @@ +/* + * fs/sdcardfs/dentry.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include "linux/ctype.h" + +/* + * returns: -ERRNO if error (returned to user) + * 0: tell VFS to invalidate dentry + * 1: dentry is valid + */ +static int sdcardfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + int err = 1; + struct path parent_lower_path, lower_path; + struct dentry *parent_dentry = NULL; + struct dentry *parent_lower_dentry = NULL; + struct dentry *lower_cur_parent_dentry = NULL; + struct dentry *lower_dentry = NULL; + + if (nd && nd->flags & LOOKUP_RCU) + return -ECHILD; + + spin_lock(&dentry->d_lock); + if (IS_ROOT(dentry)) { + spin_unlock(&dentry->d_lock); + return 1; + } + spin_unlock(&dentry->d_lock); + + /* check uninitialized obb_dentry and + * whether the base obbpath has been changed or not */ + if (is_obbpath_invalid(dentry)) { + d_drop(dentry); + return 0; + } + + parent_dentry = dget_parent(dentry); + sdcardfs_get_lower_path(parent_dentry, &parent_lower_path); + sdcardfs_get_real_lower(dentry, &lower_path); + parent_lower_dentry = parent_lower_path.dentry; + lower_dentry = lower_path.dentry; + lower_cur_parent_dentry = dget_parent(lower_dentry); + + spin_lock(&lower_dentry->d_lock); + if (d_unhashed(lower_dentry)) { + spin_unlock(&lower_dentry->d_lock); + d_drop(dentry); + err = 0; + goto out; + } + spin_unlock(&lower_dentry->d_lock); + + if (parent_lower_dentry != lower_cur_parent_dentry) { + d_drop(dentry); + err = 0; + goto out; + } + + if (dentry < lower_dentry) { + spin_lock(&dentry->d_lock); + spin_lock(&lower_dentry->d_lock); + } else { + spin_lock(&lower_dentry->d_lock); + spin_lock(&dentry->d_lock); + } + + if (dentry->d_name.len != lower_dentry->d_name.len) { + __d_drop(dentry); + err = 0; + } else if (strncasecmp(dentry->d_name.name, lower_dentry->d_name.name, + dentry->d_name.len) != 0) { + __d_drop(dentry); + err = 0; + } + + if (dentry < lower_dentry) { + spin_unlock(&lower_dentry->d_lock); + spin_unlock(&dentry->d_lock); + } else { + spin_unlock(&dentry->d_lock); + spin_unlock(&lower_dentry->d_lock); + } + +out: + dput(parent_dentry); + dput(lower_cur_parent_dentry); + sdcardfs_put_lower_path(parent_dentry, &parent_lower_path); + sdcardfs_put_real_lower(dentry, &lower_path); + return err; +} + +static void sdcardfs_d_release(struct dentry *dentry) +{ + /* release and reset the lower paths */ + if(has_graft_path(dentry)) { + sdcardfs_put_reset_orig_path(dentry); + } + sdcardfs_put_reset_lower_path(dentry); + free_dentry_private_data(dentry); + return; +} + +static int sdcardfs_hash_ci(const struct dentry *dentry, + const struct inode *inode, struct qstr *qstr) +{ + /* + * This function is copy of vfat_hashi. + * FIXME Should we support national language? + * Refer to vfat_hashi() + * struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io; + */ + const unsigned char *name; + unsigned int len; + unsigned long hash; + + name = qstr->name; + //len = vfat_striptail_len(qstr); + len = qstr->len; + + hash = init_name_hash(); + while (len--) + //hash = partial_name_hash(nls_tolower(t, *name++), hash); + hash = partial_name_hash(tolower(*name++), hash); + qstr->hash = end_name_hash(hash); + + return 0; +} + +/* + * Case insensitive compare of two vfat names. + */ +static int sdcardfs_cmp_ci(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) +{ + /* This function is copy of vfat_cmpi */ + // FIXME Should we support national language? + //struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; + //unsigned int alen, blen; + + /* A filename cannot end in '.' or we treat it like it has none */ + /* + alen = vfat_striptail_len(name); + blen = __vfat_striptail_len(len, str); + if (alen == blen) { + if (nls_strnicmp(t, name->name, str, alen) == 0) + return 0; + } + */ + if (name->len == len) { + if (strncasecmp(name->name, str, len) == 0) + return 0; + } + return 1; +} + +const struct dentry_operations sdcardfs_ci_dops = { + .d_revalidate = sdcardfs_d_revalidate, + .d_release = sdcardfs_d_release, + .d_hash = sdcardfs_hash_ci, + .d_compare = sdcardfs_cmp_ci, +}; + diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c new file mode 100644 index 000000000000..00c33a471dcc --- /dev/null +++ b/fs/sdcardfs/derived_perm.c @@ -0,0 +1,290 @@ +/* + * fs/sdcardfs/derived_perm.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" + +/* copy derived state from parent inode */ +static void inherit_derived_state(struct inode *parent, struct inode *child) +{ + struct sdcardfs_inode_info *pi = SDCARDFS_I(parent); + struct sdcardfs_inode_info *ci = SDCARDFS_I(child); + + ci->perm = PERM_INHERIT; + ci->userid = pi->userid; + ci->d_uid = pi->d_uid; + ci->d_gid = pi->d_gid; + ci->d_mode = pi->d_mode; +} + +/* helper function for derived state */ +void setup_derived_state(struct inode *inode, perm_t perm, + userid_t userid, uid_t uid, gid_t gid, mode_t mode) +{ + struct sdcardfs_inode_info *info = SDCARDFS_I(inode); + + info->perm = perm; + info->userid = userid; + info->d_uid = uid; + info->d_gid = gid; + info->d_mode = mode; +} + +void get_derived_permission(struct dentry *parent, struct dentry *dentry) +{ + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + struct sdcardfs_inode_info *info = SDCARDFS_I(dentry->d_inode); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + appid_t appid; + + /* By default, each inode inherits from its parent. + * the properties are maintained on its private fields + * because the inode attributes will be modified with that of + * its lower inode. + * The derived state will be updated on the last + * stage of each system call by fix_derived_permission(inode). + */ + + inherit_derived_state(parent->d_inode, dentry->d_inode); + + //printk(KERN_INFO "sdcardfs: derived: %s, %s, %d\n", parent->d_name.name, + // dentry->d_name.name, parent_info->perm); + + if (sbi->options.derive == DERIVE_NONE) { + return; + } + + /* Derive custom permissions based on parent and current node */ + switch (parent_info->perm) { + case PERM_INHERIT: + /* Already inherited above */ + break; + case PERM_LEGACY_PRE_ROOT: + /* Legacy internal layout places users at top level */ + info->perm = PERM_ROOT; + info->userid = simple_strtoul(dentry->d_name.name, NULL, 10); + break; + case PERM_ROOT: + /* Assume masked off by default. */ + info->d_mode = 00770; + if (!strcasecmp(dentry->d_name.name, "Android")) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID; + info->d_mode = 00771; + } else if (sbi->options.split_perms) { + if (!strcasecmp(dentry->d_name.name, "DCIM") + || !strcasecmp(dentry->d_name.name, "Pictures")) { + info->d_gid = AID_SDCARD_PICS; + } else if (!strcasecmp(dentry->d_name.name, "Alarms") + || !strcasecmp(dentry->d_name.name, "Movies") + || !strcasecmp(dentry->d_name.name, "Music") + || !strcasecmp(dentry->d_name.name, "Notifications") + || !strcasecmp(dentry->d_name.name, "Podcasts") + || !strcasecmp(dentry->d_name.name, "Ringtones")) { + info->d_gid = AID_SDCARD_AV; + } + } + break; + case PERM_ANDROID: + if (!strcasecmp(dentry->d_name.name, "data")) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_DATA; + info->d_mode = 00771; + } else if (!strcasecmp(dentry->d_name.name, "obb")) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_OBB; + info->d_mode = 00771; + // FIXME : this feature will be implemented later. + /* Single OBB directory is always shared */ + } else if (!strcasecmp(dentry->d_name.name, "user")) { + /* User directories must only be accessible to system, protected + * by sdcard_all. Zygote will bind mount the appropriate user- + * specific path. */ + info->perm = PERM_ANDROID_USER; + info->d_gid = AID_SDCARD_ALL; + info->d_mode = 00770; + } + break; + /* same policy will be applied on PERM_ANDROID_DATA + * and PERM_ANDROID_OBB */ + case PERM_ANDROID_DATA: + case PERM_ANDROID_OBB: + appid = get_appid(sbi->pkgl_id, dentry->d_name.name); + if (appid != 0) { + info->d_uid = multiuser_get_uid(parent_info->userid, appid); + } + info->d_mode = 00770; + break; + case PERM_ANDROID_USER: + /* Root of a secondary user */ + info->perm = PERM_ROOT; + info->userid = simple_strtoul(dentry->d_name.name, NULL, 10); + info->d_gid = AID_SDCARD_R; + info->d_mode = 00771; + break; + } +} + +/* main function for updating derived permission */ +inline void update_derived_permission(struct dentry *dentry) +{ + struct dentry *parent; + + if(!dentry || !dentry->d_inode) { + printk(KERN_ERR "sdcardfs: %s: invalid dentry\n", __func__); + return; + } + /* FIXME: + * 1. need to check whether the dentry is updated or not + * 2. remove the root dentry update + */ + if(IS_ROOT(dentry)) { + //setup_default_pre_root_state(dentry->d_inode); + } else { + parent = dget_parent(dentry); + if(parent) { + get_derived_permission(parent, dentry); + dput(parent); + } + } + fix_derived_permission(dentry->d_inode); +} + +int need_graft_path(struct dentry *dentry) +{ + int ret = 0; + struct dentry *parent = dget_parent(dentry); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + if(parent_info->perm == PERM_ANDROID && + !strcasecmp(dentry->d_name.name, "obb")) { + + /* /Android/obb is the base obbpath of DERIVED_UNIFIED */ + if(!(sbi->options.derive == DERIVE_UNIFIED + && parent_info->userid == 0)) { + ret = 1; + } + } + dput(parent); + return ret; +} + +int is_obbpath_invalid(struct dentry *dent) +{ + int ret = 0; + struct sdcardfs_dentry_info *di = SDCARDFS_D(dent); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dent->d_sb); + char *path_buf, *obbpath_s; + + /* check the base obbpath has been changed. + * this routine can check an uninitialized obb dentry as well. + * regarding the uninitialized obb, refer to the sdcardfs_mkdir() */ + spin_lock(&di->lock); + if(di->orig_path.dentry) { + if(!di->lower_path.dentry) { + ret = 1; + } else { + path_get(&di->lower_path); + //lower_parent = lock_parent(lower_path->dentry); + + path_buf = kmalloc(PATH_MAX, GFP_ATOMIC); + if(!path_buf) { + ret = 1; + printk(KERN_ERR "sdcardfs: " + "fail to allocate path_buf in %s.\n", __func__); + } else { + obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX); + if (d_unhashed(di->lower_path.dentry) || + strcasecmp(sbi->obbpath_s, obbpath_s)) { + ret = 1; + } + kfree(path_buf); + } + + //unlock_dir(lower_parent); + path_put(&di->lower_path); + } + } + spin_unlock(&di->lock); + return ret; +} + +int is_base_obbpath(struct dentry *dentry) +{ + int ret = 0; + struct dentry *parent = dget_parent(dentry); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + spin_lock(&SDCARDFS_D(dentry)->lock); + /* DERIVED_LEGACY */ + if(parent_info->perm == PERM_LEGACY_PRE_ROOT && + !strcasecmp(dentry->d_name.name, "obb")) { + ret = 1; + } + /* DERIVED_UNIFIED :/Android/obb is the base obbpath */ + else if (parent_info->perm == PERM_ANDROID && + !strcasecmp(dentry->d_name.name, "obb")) { + if((sbi->options.derive == DERIVE_UNIFIED + && parent_info->userid == 0)) { + ret = 1; + } + } + spin_unlock(&SDCARDFS_D(dentry)->lock); + dput(parent); + return ret; +} + +/* The lower_path will be stored to the dentry's orig_path + * and the base obbpath will be copyed to the lower_path variable. + * if an error returned, there's no change in the lower_path + * returns: -ERRNO if error (0: no error) */ +int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) +{ + int err = 0; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + struct path obbpath; + + /* A local obb dentry must have its own orig_path to support rmdir + * and mkdir of itself. Usually, we expect that the sbi->obbpath + * is avaiable on this stage. */ + sdcardfs_set_orig_path(dentry, lower_path); + + err = kern_path(sbi->obbpath_s, + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &obbpath); + + if(!err) { + /* the obbpath base has been found */ + printk(KERN_INFO "sdcardfs: " + "the sbi->obbpath is found\n"); + pathcpy(lower_path, &obbpath); + } else { + /* if the sbi->obbpath is not available, we can optionally + * setup the lower_path with its orig_path. + * but, the current implementation just returns an error + * because the sdcard daemon also regards this case as + * a lookup fail. */ + printk(KERN_INFO "sdcardfs: " + "the sbi->obbpath is not available\n"); + } + return err; +} + + diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c new file mode 100644 index 000000000000..bcacb947c874 --- /dev/null +++ b/fs/sdcardfs/file.c @@ -0,0 +1,357 @@ +/* + * fs/sdcardfs/file.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE +#include +#endif + +static ssize_t sdcardfs_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int err; + struct file *lower_file; + struct dentry *dentry = file->f_path.dentry; +#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE + struct backing_dev_info *bdi; +#endif + + lower_file = sdcardfs_lower_file(file); + +#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE + if (file->f_mode & FMODE_NOACTIVE) { + if (!(lower_file->f_mode & FMODE_NOACTIVE)) { + bdi = lower_file->f_mapping->backing_dev_info; + lower_file->f_ra.ra_pages = bdi->ra_pages * 2; + spin_lock(&lower_file->f_lock); + lower_file->f_mode |= FMODE_NOACTIVE; + spin_unlock(&lower_file->f_lock); + } + } +#endif + + err = vfs_read(lower_file, buf, count, ppos); + /* update our inode atime upon a successful lower read */ + if (err >= 0) + fsstack_copy_attr_atime(dentry->d_inode, + lower_file->f_path.dentry->d_inode); + + return err; +} + +static ssize_t sdcardfs_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int err = 0; + struct file *lower_file; + struct dentry *dentry = file->f_path.dentry; + + /* check disk space */ + if (!check_min_free_space(dentry, count, 0)) { + printk(KERN_INFO "No minimum free space.\n"); + return -ENOSPC; + } + + lower_file = sdcardfs_lower_file(file); + err = vfs_write(lower_file, buf, count, ppos); + /* update our inode times+sizes upon a successful lower write */ + if (err >= 0) { + fsstack_copy_inode_size(dentry->d_inode, + lower_file->f_path.dentry->d_inode); + fsstack_copy_attr_times(dentry->d_inode, + lower_file->f_path.dentry->d_inode); + } + + return err; +} + +static int sdcardfs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + int err = 0; + struct file *lower_file = NULL; + struct dentry *dentry = file->f_path.dentry; + + lower_file = sdcardfs_lower_file(file); + + lower_file->f_pos = file->f_pos; + err = vfs_readdir(lower_file, filldir, dirent); + file->f_pos = lower_file->f_pos; + if (err >= 0) /* copy the atime */ + fsstack_copy_attr_atime(dentry->d_inode, + lower_file->f_path.dentry->d_inode); + return err; +} + +static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + long err = -ENOTTY; + struct file *lower_file; + + lower_file = sdcardfs_lower_file(file); + + /* XXX: use vfs_ioctl if/when VFS exports it */ + if (!lower_file || !lower_file->f_op) + goto out; + if (lower_file->f_op->unlocked_ioctl) + err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); + +out: + return err; +} + +#ifdef CONFIG_COMPAT +static long sdcardfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + long err = -ENOTTY; + struct file *lower_file; + + lower_file = sdcardfs_lower_file(file); + + /* XXX: use vfs_ioctl if/when VFS exports it */ + if (!lower_file || !lower_file->f_op) + goto out; + if (lower_file->f_op->compat_ioctl) + err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); + +out: + return err; +} +#endif + +static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + int err = 0; + bool willwrite; + struct file *lower_file; + const struct vm_operations_struct *saved_vm_ops = NULL; + + /* this might be deferred to mmap's writepage */ + willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags); + + /* + * File systems which do not implement ->writepage may use + * generic_file_readonly_mmap as their ->mmap op. If you call + * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL. + * But we cannot call the lower ->mmap op, so we can't tell that + * writeable mappings won't work. Therefore, our only choice is to + * check if the lower file system supports the ->writepage, and if + * not, return EINVAL (the same error that + * generic_file_readonly_mmap returns in that case). + */ + lower_file = sdcardfs_lower_file(file); + if (willwrite && !lower_file->f_mapping->a_ops->writepage) { + err = -EINVAL; + printk(KERN_ERR "sdcardfs: lower file system does not " + "support writeable mmap\n"); + goto out; + } + + /* + * find and save lower vm_ops. + * + * XXX: the VFS should have a cleaner way of finding the lower vm_ops + */ + if (!SDCARDFS_F(file)->lower_vm_ops) { + err = lower_file->f_op->mmap(lower_file, vma); + if (err) { + printk(KERN_ERR "sdcardfs: lower mmap failed %d\n", err); + goto out; + } + saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */ + err = do_munmap(current->mm, vma->vm_start, + vma->vm_end - vma->vm_start); + if (err) { + printk(KERN_ERR "sdcardfs: do_munmap failed %d\n", err); + goto out; + } + } + + /* + * Next 3 lines are all I need from generic_file_mmap. I definitely + * don't want its test for ->readpage which returns -ENOEXEC. + */ + file_accessed(file); + vma->vm_ops = &sdcardfs_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; + + file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */ + if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */ + SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops; + +out: + return err; +} + +static int sdcardfs_open(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct path lower_path; + struct dentry *dentry = file->f_path.dentry; + struct dentry *parent = dget_parent(dentry); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + int has_rw; + + /* don't open unhashed/deleted files */ + if (d_unhashed(dentry)) { + err = -ENOENT; + goto out_err; + } + + has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, + sbi->options.derive, + open_flags_to_access_mode(file->f_flags), has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + goto out_err; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(sbi, saved_cred); + + file->private_data = + kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL); + if (!SDCARDFS_F(file)) { + err = -ENOMEM; + goto out_revert_cred; + } + + /* open lower object and link sdcardfs's file struct to lower's */ + sdcardfs_get_lower_path(file->f_path.dentry, &lower_path); + lower_file = dentry_open(lower_path.dentry, lower_path.mnt, + file->f_flags, current_cred()); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + lower_file = sdcardfs_lower_file(file); + if (lower_file) { + sdcardfs_set_lower_file(file, NULL); + fput(lower_file); /* fput calls dput for lower_dentry */ + } + } else { + sdcardfs_set_lower_file(file, lower_file); + } + + if (err) + kfree(SDCARDFS_F(file)); + else { + fsstack_copy_attr_all(inode, sdcardfs_lower_inode(inode)); + fix_derived_permission(inode); + } + +out_revert_cred: + REVERT_CRED(saved_cred); +out_err: + dput(parent); + return err; +} + +static int sdcardfs_flush(struct file *file, fl_owner_t id) +{ + int err = 0; + struct file *lower_file = NULL; + + lower_file = sdcardfs_lower_file(file); + if (lower_file && lower_file->f_op && lower_file->f_op->flush) + err = lower_file->f_op->flush(lower_file, id); + + return err; +} + +/* release all lower object references & free the file info structure */ +static int sdcardfs_file_release(struct inode *inode, struct file *file) +{ + struct file *lower_file; + + lower_file = sdcardfs_lower_file(file); + if (lower_file) { + sdcardfs_set_lower_file(file, NULL); + fput(lower_file); + } + + kfree(SDCARDFS_F(file)); + return 0; +} + +static int +sdcardfs_fsync(struct file *file, int datasync) +{ + int err; + struct file *lower_file; + struct path lower_path; + struct dentry *dentry = file->f_path.dentry; + + lower_file = sdcardfs_lower_file(file); + sdcardfs_get_lower_path(dentry, &lower_path); + err = vfs_fsync(lower_file, datasync); + sdcardfs_put_lower_path(dentry, &lower_path); + + return err; +} + +static int sdcardfs_fasync(int fd, struct file *file, int flag) +{ + int err = 0; + struct file *lower_file = NULL; + + lower_file = sdcardfs_lower_file(file); + if (lower_file->f_op && lower_file->f_op->fasync) + err = lower_file->f_op->fasync(fd, lower_file, flag); + + return err; +} + +const struct file_operations sdcardfs_main_fops = { + .llseek = generic_file_llseek, + .read = sdcardfs_read, + .write = sdcardfs_write, + .unlocked_ioctl = sdcardfs_unlocked_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = sdcardfs_compat_ioctl, +#endif + .mmap = sdcardfs_mmap, + .open = sdcardfs_open, + .flush = sdcardfs_flush, + .release = sdcardfs_file_release, + .fsync = sdcardfs_fsync, + .fasync = sdcardfs_fasync, +}; + +/* trimmed directory options */ +const struct file_operations sdcardfs_dir_fops = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = sdcardfs_readdir, + .unlocked_ioctl = sdcardfs_unlocked_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = sdcardfs_compat_ioctl, +#endif + .open = sdcardfs_open, + .release = sdcardfs_file_release, + .flush = sdcardfs_flush, + .fsync = sdcardfs_fsync, + .fasync = sdcardfs_fasync, +}; diff --git a/fs/sdcardfs/hashtable.h b/fs/sdcardfs/hashtable.h new file mode 100644 index 000000000000..1e770f3df148 --- /dev/null +++ b/fs/sdcardfs/hashtable.h @@ -0,0 +1,190 @@ +/* + * Statically sized hash table implementation + * (C) 2012 Sasha Levin + */ + +#ifndef _LINUX_HASHTABLE_H +#define _LINUX_HASHTABLE_H + +#include +#include +#include +#include +#include + +#define DEFINE_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] = \ + { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } + +#define DECLARE_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] + +#define HASH_SIZE(name) (ARRAY_SIZE(name)) +#define HASH_BITS(name) ilog2(HASH_SIZE(name)) + +/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */ +#define hash_min(val, bits) \ + (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)) + +static inline void __hash_init(struct hlist_head *ht, unsigned int sz) +{ + unsigned int i; + + for (i = 0; i < sz; i++) + INIT_HLIST_HEAD(&ht[i]); +} + +/** + * hash_init - initialize a hash table + * @hashtable: hashtable to be initialized + * + * Calculates the size of the hashtable from the given parameter, otherwise + * same as hash_init_size. + * + * This has to be a macro since HASH_BITS() will not work on pointers since + * it calculates the size during preprocessing. + */ +#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable)) + +/** + * hash_add - add an object to a hashtable + * @hashtable: hashtable to add to + * @node: the &struct hlist_node of the object to be added + * @key: the key of the object to be added + */ +#define hash_add(hashtable, node, key) \ + hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) + +/** + * hash_add_rcu - add an object to a rcu enabled hashtable + * @hashtable: hashtable to add to + * @node: the &struct hlist_node of the object to be added + * @key: the key of the object to be added + */ +#define hash_add_rcu(hashtable, node, key) \ + hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) + +/** + * hash_hashed - check whether an object is in any hashtable + * @node: the &struct hlist_node of the object to be checked + */ +static inline bool hash_hashed(struct hlist_node *node) +{ + return !hlist_unhashed(node); +} + +static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz) +{ + unsigned int i; + + for (i = 0; i < sz; i++) + if (!hlist_empty(&ht[i])) + return false; + + return true; +} + +/** + * hash_empty - check whether a hashtable is empty + * @hashtable: hashtable to check + * + * This has to be a macro since HASH_BITS() will not work on pointers since + * it calculates the size during preprocessing. + */ +#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable)) + +/** + * hash_del - remove an object from a hashtable + * @node: &struct hlist_node of the object to remove + */ +static inline void hash_del(struct hlist_node *node) +{ + hlist_del_init(node); +} + +/** + * hash_del_rcu - remove an object from a rcu enabled hashtable + * @node: &struct hlist_node of the object to remove + */ +static inline void hash_del_rcu(struct hlist_node *node) +{ + hlist_del_init_rcu(node); +} + +/** + * hash_for_each - iterate over a hashtable + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each(name, bkt, obj, member, pos) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry(obj, pos, &name[bkt], member) + +/** + * hash_for_each_rcu - iterate over a rcu enabled hashtable + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each_rcu(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_rcu(obj, &name[bkt], member) + +/** + * hash_for_each_safe - iterate over a hashtable safe against removal of + * hash entry + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @tmp: a &struct used for temporary storage + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each_safe(name, bkt, tmp, obj, member, pos) \ + for ((bkt) = 0, obj = NULL; (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_safe(obj, pos, tmp, &name[bkt], member) + +/** + * hash_for_each_possible - iterate over all possible objects hashing to the + * same bucket + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible(name, obj, member, key, pos) \ + hlist_for_each_entry(obj, pos, &name[hash_min(key, HASH_BITS(name))], member) + +/** + * hash_for_each_possible_rcu - iterate over all possible objects hashing to the + * same bucket in an rcu enabled hashtable + * in a rcu enabled hashtable + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible_rcu(name, obj, member, key) \ + hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ + member) + +/** + * hash_for_each_possible_safe - iterate over all possible objects hashing to the + * same bucket safe against removals + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @tmp: a &struct used for temporary storage + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible_safe(name, obj, tmp, member, key) \ + hlist_for_each_entry_safe(obj, tmp,\ + &name[hash_min(key, HASH_BITS(name))], member) + + +#endif diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c new file mode 100644 index 000000000000..e8ed04250ed1 --- /dev/null +++ b/fs/sdcardfs/inode.c @@ -0,0 +1,886 @@ +/* + * fs/sdcardfs/inode.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" + +/* Do not directly use this function. Use OVERRIDE_CRED() instead. */ +const struct cred * override_fsids(struct sdcardfs_sb_info* sbi) +{ + struct cred * cred; + const struct cred * old_cred; + + cred = prepare_creds(); + if (!cred) + return NULL; + + cred->fsuid = sbi->options.fs_low_uid; + cred->fsgid = sbi->options.fs_low_gid; + + old_cred = override_creds(cred); + + return old_cred; +} + +/* Do not directly use this function, use REVERT_CRED() instead. */ +void revert_fsids(const struct cred * old_cred) +{ + const struct cred * cur_cred; + + cur_cred = current->cred; + revert_creds(old_cred); + put_cred(cur_cred); +} + +static int sdcardfs_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + int err = 0; + struct dentry *lower_dentry; + struct dentry *lower_parent_dentry = NULL; + struct path lower_path, saved_path; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + + int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_parent_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + + pathcpy(&saved_path, &nd->path); + pathcpy(&nd->path, &lower_path); + + /* set last 16bytes of mode field to 0664 */ + mode = (mode & S_IFMT) | 00664; + err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode, nd); + + pathcpy(&nd->path, &saved_path); + if (err) + goto out; + + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + if (err) + goto out; + fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); + fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_parent_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); + REVERT_CRED(saved_cred); +out_eacces: + return err; +} + +#if 0 +static int sdcardfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + struct dentry *lower_old_dentry; + struct dentry *lower_new_dentry; + struct dentry *lower_dir_dentry; + u64 file_size_save; + int err; + struct path lower_old_path, lower_new_path; + + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb)); + + file_size_save = i_size_read(old_dentry->d_inode); + sdcardfs_get_lower_path(old_dentry, &lower_old_path); + sdcardfs_get_lower_path(new_dentry, &lower_new_path); + lower_old_dentry = lower_old_path.dentry; + lower_new_dentry = lower_new_path.dentry; + lower_dir_dentry = lock_parent(lower_new_dentry); + + err = mnt_want_write(lower_new_path.mnt); + if (err) + goto out_unlock; + + err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, + lower_new_dentry); + if (err || !lower_new_dentry->d_inode) + goto out; + + err = sdcardfs_interpose(new_dentry, dir->i_sb, &lower_new_path); + if (err) + goto out; + fsstack_copy_attr_times(dir, lower_new_dentry->d_inode); + fsstack_copy_inode_size(dir, lower_new_dentry->d_inode); + old_dentry->d_inode->i_nlink = + sdcardfs_lower_inode(old_dentry->d_inode)->i_nlink; + i_size_write(new_dentry->d_inode, file_size_save); +out: + mnt_drop_write(lower_new_path.mnt); +out_unlock: + unlock_dir(lower_dir_dentry); + sdcardfs_put_lower_path(old_dentry, &lower_old_path); + sdcardfs_put_lower_path(new_dentry, &lower_new_path); + REVERT_CRED(); + return err; +} +#endif + +static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) +{ + int err; + struct dentry *lower_dentry; + struct inode *lower_dir_inode = sdcardfs_lower_inode(dir); + struct dentry *lower_dir_dentry; + struct path lower_path; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + + int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + dget(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + err = vfs_unlink(lower_dir_inode, lower_dentry); + + /* + * Note: unlinking on top of NFS can cause silly-renamed files. + * Trying to delete such files results in EBUSY from NFS + * below. Silly-renamed files will get deleted by NFS later on, so + * we just need to detect them here and treat such EBUSY errors as + * if the upper file was successfully deleted. + */ + if (err == -EBUSY && lower_dentry->d_flags & DCACHE_NFSFS_RENAMED) + err = 0; + if (err) + goto out; + fsstack_copy_attr_times(dir, lower_dir_inode); + fsstack_copy_inode_size(dir, lower_dir_inode); + dentry->d_inode->i_nlink = + sdcardfs_lower_inode(dentry->d_inode)->i_nlink; + dentry->d_inode->i_ctime = dir->i_ctime; + d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */ +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_dir_dentry); + dput(lower_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); + REVERT_CRED(saved_cred); +out_eacces: + return err; +} + +#if 0 +static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + int err = 0; + struct dentry *lower_dentry; + struct dentry *lower_parent_dentry = NULL; + struct path lower_path; + + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb)); + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_parent_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname); + if (err) + goto out; + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + if (err) + goto out; + fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); + fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_parent_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); + REVERT_CRED(); + return err; +} +#endif + +static int touch(char *abs_path, mode_t mode) { + struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode); + if (IS_ERR(filp)) { + if (PTR_ERR(filp) == -EEXIST) { + return 0; + } + else { + printk(KERN_ERR "sdcardfs: failed to open(%s): %ld\n", + abs_path, PTR_ERR(filp)); + return PTR_ERR(filp); + } + } + filp_close(filp, current->files); + return 0; +} + +static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + int err = 0; + int make_nomedia_in_obb = 0; + struct dentry *lower_dentry; + struct dentry *lower_parent_dentry = NULL; + struct path lower_path; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + struct sdcardfs_inode_info *pi = SDCARDFS_I(dir); + char *page_buf; + char *nomedia_dir_name; + char *nomedia_fullpath; + int fullpath_namelen; + int touch_err = 0; + + int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); + + /* check disk space */ + if (!check_min_free_space(dentry, 0, 1)) { + printk(KERN_INFO "sdcardfs: No minimum free space.\n"); + err = -ENOSPC; + goto out_revert; + } + + /* the lower_dentry is negative here */ + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_parent_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + + /* set last 16bytes of mode field to 0775 */ + mode = (mode & S_IFMT) | 00775; + err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry, mode); + + if (err) + goto out; + + /* if it is a local obb dentry, setup it with the base obbpath */ + if(need_graft_path(dentry)) { + + err = setup_obb_dentry(dentry, &lower_path); + if(err) { + /* if the sbi->obbpath is not available, the lower_path won't be + * changed by setup_obb_dentry() but the lower path is saved to + * its orig_path. this dentry will be revalidated later. + * but now, the lower_path should be NULL */ + sdcardfs_put_reset_lower_path(dentry); + + /* the newly created lower path which saved to its orig_path or + * the lower_path is the base obbpath. + * therefore, an additional path_get is required */ + path_get(&lower_path); + } else + make_nomedia_in_obb = 1; + } + + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + if (err) + goto out; + + fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); + fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + /* update number of links on parent directory */ + dir->i_nlink = sdcardfs_lower_inode(dir)->i_nlink; + + if ((sbi->options.derive == DERIVE_UNIFIED) && (!strcasecmp(dentry->d_name.name, "obb")) + && (pi->perm == PERM_ANDROID) && (pi->userid == 0)) + make_nomedia_in_obb = 1; + + /* When creating /Android/data and /Android/obb, mark them as .nomedia */ + if (make_nomedia_in_obb || + ((pi->perm == PERM_ANDROID) && (!strcasecmp(dentry->d_name.name, "data")))) { + + page_buf = (char *)__get_free_page(GFP_KERNEL); + if (!page_buf) { + printk(KERN_ERR "sdcardfs: failed to allocate page buf\n"); + goto out; + } + + nomedia_dir_name = d_absolute_path(&lower_path, page_buf, PAGE_SIZE); + if (IS_ERR(nomedia_dir_name)) { + free_page((unsigned long)page_buf); + printk(KERN_ERR "sdcardfs: failed to get .nomedia dir name\n"); + goto out; + } + + fullpath_namelen = page_buf + PAGE_SIZE - nomedia_dir_name - 1; + fullpath_namelen += strlen("/.nomedia"); + nomedia_fullpath = kzalloc(fullpath_namelen + 1, GFP_KERNEL); + if (!nomedia_fullpath) { + free_page((unsigned long)page_buf); + printk(KERN_ERR "sdcardfs: failed to allocate .nomedia fullpath buf\n"); + goto out; + } + + strcpy(nomedia_fullpath, nomedia_dir_name); + free_page((unsigned long)page_buf); + strcat(nomedia_fullpath, "/.nomedia"); + touch_err = touch(nomedia_fullpath, 0664); + if (touch_err) { + printk(KERN_ERR "sdcardfs: failed to touch(%s): %d\n", + nomedia_fullpath, touch_err); + kfree(nomedia_fullpath); + goto out; + } + kfree(nomedia_fullpath); + } +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_parent_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); +out_revert: + REVERT_CRED(saved_cred); +out_eacces: + return err; +} + +static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; + int err; + struct path lower_path; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + //char *path_s = NULL; + + int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); + + /* sdcardfs_get_real_lower(): in case of remove an user's obb dentry + * the dentry on the original path should be deleted. */ + sdcardfs_get_real_lower(dentry, &lower_path); + + lower_dentry = lower_path.dentry; + lower_dir_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); + if (err) + goto out; + + d_drop(dentry); /* drop our dentry on success (why not VFS's job?) */ + if (dentry->d_inode) + clear_nlink(dentry->d_inode); + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); + fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); + dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; + +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_dir_dentry); + sdcardfs_put_real_lower(dentry, &lower_path); + REVERT_CRED(saved_cred); +out_eacces: + return err; +} + +#if 0 +static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode, + dev_t dev) +{ + int err = 0; + struct dentry *lower_dentry; + struct dentry *lower_parent_dentry = NULL; + struct path lower_path; + + OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb)); + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_parent_dentry = lock_parent(lower_dentry); + + err = mnt_want_write(lower_path.mnt); + if (err) + goto out_unlock; + err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev); + if (err) + goto out; + + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + if (err) + goto out; + fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); + fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + +out: + mnt_drop_write(lower_path.mnt); +out_unlock: + unlock_dir(lower_parent_dentry); + sdcardfs_put_lower_path(dentry, &lower_path); + REVERT_CRED(); + return err; +} +#endif + +/* + * The locking rules in sdcardfs_rename are complex. We could use a simpler + * superblock-level name-space lock for renames and copy-ups. + */ +static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + int err = 0; + struct dentry *lower_old_dentry = NULL; + struct dentry *lower_new_dentry = NULL; + struct dentry *lower_old_dir_dentry = NULL; + struct dentry *lower_new_dir_dentry = NULL; + struct dentry *trap = NULL; + struct dentry *new_parent = NULL; + struct path lower_old_path, lower_new_path; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(old_dentry->d_sb); + const struct cred *saved_cred = NULL; + + int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + if(!check_caller_access_to_name(old_dir, old_dentry->d_name.name, + sbi->options.derive, 1, has_rw) || + !check_caller_access_to_name(new_dir, new_dentry->d_name.name, + sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " new_dentry: %s, task:%s\n", + __func__, new_dentry->d_name.name, current->comm); + err = -EACCES; + goto out_eacces; + } + + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(old_dir->i_sb), saved_cred); + + sdcardfs_get_real_lower(old_dentry, &lower_old_path); + sdcardfs_get_lower_path(new_dentry, &lower_new_path); + lower_old_dentry = lower_old_path.dentry; + lower_new_dentry = lower_new_path.dentry; + lower_old_dir_dentry = dget_parent(lower_old_dentry); + lower_new_dir_dentry = dget_parent(lower_new_dentry); + + trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + /* source should not be ancestor of target */ + if (trap == lower_old_dentry) { + err = -EINVAL; + goto out; + } + /* target should not be ancestor of source */ + if (trap == lower_new_dentry) { + err = -ENOTEMPTY; + goto out; + } + + err = mnt_want_write(lower_old_path.mnt); + if (err) + goto out; + err = mnt_want_write(lower_new_path.mnt); + if (err) + goto out_drop_old_write; + + err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, + lower_new_dir_dentry->d_inode, lower_new_dentry); + if (err) + goto out_err; + + /* Copy attrs from lower dir, but i_uid/i_gid */ + fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); + fsstack_copy_inode_size(new_dir, lower_new_dir_dentry->d_inode); + fix_derived_permission(new_dir); + if (new_dir != old_dir) { + fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); + fsstack_copy_inode_size(old_dir, lower_old_dir_dentry->d_inode); + fix_derived_permission(old_dir); + /* update the derived permission of the old_dentry + * with its new parent + */ + new_parent = dget_parent(new_dentry); + if(new_parent) { + if(old_dentry->d_inode) { + get_derived_permission(new_parent, old_dentry); + fix_derived_permission(old_dentry->d_inode); + } + dput(new_parent); + } + } + +out_err: + mnt_drop_write(lower_new_path.mnt); +out_drop_old_write: + mnt_drop_write(lower_old_path.mnt); +out: + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + dput(lower_old_dir_dentry); + dput(lower_new_dir_dentry); + sdcardfs_put_real_lower(old_dentry, &lower_old_path); + sdcardfs_put_lower_path(new_dentry, &lower_new_path); + REVERT_CRED(saved_cred); +out_eacces: + return err; +} + +#if 0 +static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +{ + int err; + struct dentry *lower_dentry; + struct path lower_path; + /* XXX readlink does not requires overriding credential */ + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + if (!lower_dentry->d_inode->i_op || + !lower_dentry->d_inode->i_op->readlink) { + err = -EINVAL; + goto out; + } + + err = lower_dentry->d_inode->i_op->readlink(lower_dentry, + buf, bufsiz); + if (err < 0) + goto out; + fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode); + +out: + sdcardfs_put_lower_path(dentry, &lower_path); + return err; +} +#endif + +#if 0 +static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + char *buf; + int len = PAGE_SIZE, err; + mm_segment_t old_fs; + + /* This is freed by the put_link method assuming a successful call. */ + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + buf = ERR_PTR(-ENOMEM); + goto out; + } + + /* read the symlink, and then we will follow it */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sdcardfs_readlink(dentry, buf, len); + set_fs(old_fs); + if (err < 0) { + kfree(buf); + buf = ERR_PTR(err); + } else { + buf[err] = '\0'; + } +out: + nd_set_link(nd, buf); + return NULL; +} +#endif + +#if 0 +/* this @nd *IS* still used */ +static void sdcardfs_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *buf = nd_get_link(nd); + if (!IS_ERR(buf)) /* free the char* */ + kfree(buf); +} +#endif + +static int sdcardfs_permission(struct inode *inode, int mask, unsigned int flags) +{ + int err; + + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + /* + * Permission check on sdcardfs inode. + * Calling process should have AID_SDCARD_RW permission + */ + err = generic_permission(inode, mask, 0, inode->i_op->check_acl); + + /* XXX + * Original sdcardfs code calls inode_permission(lower_inode,.. ) + * for checking inode permission. But doing such things here seems + * duplicated work, because the functions called after this func, + * such as vfs_create, vfs_unlink, vfs_rename, and etc, + * does exactly same thing, i.e., they calls inode_permission(). + * So we just let they do the things. + * If there are any security hole, just uncomment following if block. + */ +#if 0 + if (!err) { + /* + * Permission check on lower_inode(=EXT4). + * we check it with AID_MEDIA_RW permission + */ + struct inode *lower_inode; + OVERRIDE_CRED(SDCARDFS_SB(inode->sb)); + + lower_inode = sdcardfs_lower_inode(inode); + err = inode_permission(lower_inode, mask); + + REVERT_CRED(); + } +#endif + return err; + +} + +static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct dentry *lower_dentry; + struct inode *inode; + struct inode *lower_inode; + struct path lower_path; + struct dentry *parent; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + parent = dget_parent(dentry); + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, + sbi->options.derive, 0, 0)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + dput(parent); + return -EACCES; + } + dput(parent); + + inode = dentry->d_inode; + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_inode = sdcardfs_lower_inode(inode); + + fsstack_copy_attr_all(inode, lower_inode); + fsstack_copy_inode_size(inode, lower_inode); + /* if the dentry has been moved from other location + * so, on this stage, its derived permission must be + * rechecked from its private field. + */ + fix_derived_permission(inode); + + generic_fillattr(inode, stat); + sdcardfs_put_lower_path(dentry, &lower_path); + return 0; +} + +static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) +{ + int err = 0; + struct dentry *lower_dentry; + struct inode *inode; + struct inode *lower_inode; + struct path lower_path; + struct iattr lower_ia; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + struct dentry *parent; + int has_rw; + + inode = dentry->d_inode; + + /* + * Check if user has permission to change inode. We don't check if + * this user can change the lower inode: that should happen when + * calling notify_change on the lower inode. + */ + err = inode_change_ok(inode, ia); + + /* no vfs_XXX operations required, cred overriding will be skipped. wj*/ + if (!err) { + /* check the Android group ID */ + has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); + parent = dget_parent(dentry); + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, + sbi->options.derive, 1, has_rw)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + err = -EACCES; + } + dput(parent); + } + + if (err) + goto out_err; + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_inode = sdcardfs_lower_inode(inode); + + /* prepare our own lower struct iattr (with the lower file) */ + memcpy(&lower_ia, ia, sizeof(lower_ia)); + if (ia->ia_valid & ATTR_FILE) + lower_ia.ia_file = sdcardfs_lower_file(ia->ia_file); + + lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE); + + /* + * If shrinking, first truncate upper level to cancel writing dirty + * pages beyond the new eof; and also if its' maxbytes is more + * limiting (fail with -EFBIG before making any change to the lower + * level). There is no need to vmtruncate the upper level + * afterwards in the other cases: we fsstack_copy_inode_size from + * the lower level. + */ + if (current->mm) + down_write(¤t->mm->mmap_sem); + if (ia->ia_valid & ATTR_SIZE) { + err = inode_newsize_ok(inode, ia->ia_size); + if (err) { + if (current->mm) + up_write(¤t->mm->mmap_sem); + goto out; + } + truncate_setsize(inode, ia->ia_size); + } + + /* + * mode change is for clearing setuid/setgid bits. Allow lower fs + * to interpret this in its own way. + */ + if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) + lower_ia.ia_valid &= ~ATTR_MODE; + + /* notify the (possibly copied-up) lower inode */ + /* + * Note: we use lower_dentry->d_inode, because lower_inode may be + * unlinked (no inode->i_sb and i_ino==0. This happens if someone + * tries to open(), unlink(), then ftruncate() a file. + */ + mutex_lock(&lower_dentry->d_inode->i_mutex); + err = notify_change(lower_dentry, &lower_ia); /* note: lower_ia */ + mutex_unlock(&lower_dentry->d_inode->i_mutex); + if (current->mm) + up_write(¤t->mm->mmap_sem); + if (err) + goto out; + + /* get attributes from the lower inode */ + fsstack_copy_attr_all(inode, lower_inode); + /* update derived permission of the upper inode */ + fix_derived_permission(inode); + + /* + * Not running fsstack_copy_inode_size(inode, lower_inode), because + * VFS should update our inode size, and notify_change on + * lower_inode should update its size. + */ + +out: + sdcardfs_put_lower_path(dentry, &lower_path); +out_err: + return err; +} + +const struct inode_operations sdcardfs_symlink_iops = { + .permission = sdcardfs_permission, + .setattr = sdcardfs_setattr, + /* XXX Following operations are implemented, + * but FUSE(sdcard) or FAT does not support them + * These methods are *NOT* perfectly tested. + .readlink = sdcardfs_readlink, + .follow_link = sdcardfs_follow_link, + .put_link = sdcardfs_put_link, + */ +}; + +const struct inode_operations sdcardfs_dir_iops = { + .create = sdcardfs_create, + .lookup = sdcardfs_lookup, + .permission = sdcardfs_permission, + .unlink = sdcardfs_unlink, + .mkdir = sdcardfs_mkdir, + .rmdir = sdcardfs_rmdir, + .rename = sdcardfs_rename, + .setattr = sdcardfs_setattr, + .getattr = sdcardfs_getattr, + /* XXX Following operations are implemented, + * but FUSE(sdcard) or FAT does not support them + * These methods are *NOT* perfectly tested. + .symlink = sdcardfs_symlink, + .link = sdcardfs_link, + .mknod = sdcardfs_mknod, + */ +}; + +const struct inode_operations sdcardfs_main_iops = { + .permission = sdcardfs_permission, + .setattr = sdcardfs_setattr, + .getattr = sdcardfs_getattr, +}; diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c new file mode 100644 index 000000000000..c0b12375b1bf --- /dev/null +++ b/fs/sdcardfs/lookup.c @@ -0,0 +1,386 @@ +/* + * fs/sdcardfs/lookup.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include "linux/delay.h" + +/* The dentry cache is just so we have properly sized dentries */ +static struct kmem_cache *sdcardfs_dentry_cachep; + +int sdcardfs_init_dentry_cache(void) +{ + sdcardfs_dentry_cachep = + kmem_cache_create("sdcardfs_dentry", + sizeof(struct sdcardfs_dentry_info), + 0, SLAB_RECLAIM_ACCOUNT, NULL); + + return sdcardfs_dentry_cachep ? 0 : -ENOMEM; +} + +void sdcardfs_destroy_dentry_cache(void) +{ + if (sdcardfs_dentry_cachep) + kmem_cache_destroy(sdcardfs_dentry_cachep); +} + +void free_dentry_private_data(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + kmem_cache_free(sdcardfs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +/* allocate new dentry private data */ +int new_dentry_private_data(struct dentry *dentry) +{ + struct sdcardfs_dentry_info *info = SDCARDFS_D(dentry); + + /* use zalloc to init dentry_info.lower_path */ + info = kmem_cache_zalloc(sdcardfs_dentry_cachep, GFP_ATOMIC); + if (!info) + return -ENOMEM; + + spin_lock_init(&info->lock); + dentry->d_fsdata = info; + + return 0; +} + +static int sdcardfs_inode_test(struct inode *inode, void *candidate_lower_inode) +{ + struct inode *current_lower_inode = sdcardfs_lower_inode(inode); + if (current_lower_inode == (struct inode *)candidate_lower_inode) + return 1; /* found a match */ + else + return 0; /* no match */ +} + +static int sdcardfs_inode_set(struct inode *inode, void *lower_inode) +{ + /* we do actual inode initialization in sdcardfs_iget */ + return 0; +} + +static struct inode *sdcardfs_iget(struct super_block *sb, + struct inode *lower_inode) +{ + struct sdcardfs_inode_info *info; + struct inode *inode; /* the new inode to return */ + int err; + + inode = iget5_locked(sb, /* our superblock */ + /* + * hashval: we use inode number, but we can + * also use "(unsigned long)lower_inode" + * instead. + */ + lower_inode->i_ino, /* hashval */ + sdcardfs_inode_test, /* inode comparison function */ + sdcardfs_inode_set, /* inode init function */ + lower_inode); /* data passed to test+set fxns */ + if (!inode) { + err = -EACCES; + iput(lower_inode); + return ERR_PTR(err); + } + /* if found a cached inode, then just return it */ + if (!(inode->i_state & I_NEW)) + return inode; + + /* initialize new inode */ + info = SDCARDFS_I(inode); + + inode->i_ino = lower_inode->i_ino; + if (!igrab(lower_inode)) { + err = -ESTALE; + return ERR_PTR(err); + } + sdcardfs_set_lower_inode(inode, lower_inode); + + inode->i_version++; + + /* use different set of inode ops for symlinks & directories */ + if (S_ISDIR(lower_inode->i_mode)) + inode->i_op = &sdcardfs_dir_iops; + else if (S_ISLNK(lower_inode->i_mode)) + inode->i_op = &sdcardfs_symlink_iops; + else + inode->i_op = &sdcardfs_main_iops; + + /* use different set of file ops for directories */ + if (S_ISDIR(lower_inode->i_mode)) + inode->i_fop = &sdcardfs_dir_fops; + else + inode->i_fop = &sdcardfs_main_fops; + + inode->i_mapping->a_ops = &sdcardfs_aops; + + inode->i_atime.tv_sec = 0; + inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_sec = 0; + inode->i_mtime.tv_nsec = 0; + inode->i_ctime.tv_sec = 0; + inode->i_ctime.tv_nsec = 0; + + /* properly initialize special inodes */ + if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) || + S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) + init_special_inode(inode, lower_inode->i_mode, + lower_inode->i_rdev); + + /* all well, copy inode attributes */ + fsstack_copy_attr_all(inode, lower_inode); + fsstack_copy_inode_size(inode, lower_inode); + + fix_derived_permission(inode); + + unlock_new_inode(inode); + return inode; +} + +/* + * Connect a sdcardfs inode dentry/inode with several lower ones. This is + * the classic stackable file system "vnode interposition" action. + * + * @dentry: sdcardfs's dentry which interposes on lower one + * @sb: sdcardfs's super_block + * @lower_path: the lower path (caller does path_get/put) + */ +int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path) +{ + int err = 0; + struct inode *inode; + struct inode *lower_inode; + struct super_block *lower_sb; + + lower_inode = lower_path->dentry->d_inode; + lower_sb = sdcardfs_lower_super(sb); + + /* check that the lower file system didn't cross a mount point */ + if (lower_inode->i_sb != lower_sb) { + err = -EXDEV; + goto out; + } + + /* + * We allocate our new inode below by calling sdcardfs_iget, + * which will initialize some of the new inode's fields + */ + + /* inherit lower inode number for sdcardfs's inode */ + inode = sdcardfs_iget(sb, lower_inode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + d_add(dentry, inode); + update_derived_permission(dentry); +out: + return err; +} + +/* + * Main driver function for sdcardfs's lookup. + * + * Returns: NULL (ok), ERR_PTR if an error occurred. + * Fills in lower_parent_path with on success. + */ +static struct dentry *__sdcardfs_lookup(struct dentry *dentry, + struct nameidata *nd, struct path *lower_parent_path) +{ + int err = 0; + struct vfsmount *lower_dir_mnt; + struct dentry *lower_dir_dentry = NULL; + struct dentry *lower_dentry; + const char *name; + struct nameidata lower_nd; + struct path lower_path; + struct qstr this; + struct sdcardfs_sb_info *sbi; + + sbi = SDCARDFS_SB(dentry->d_sb); + /* must initialize dentry operations */ + d_set_d_op(dentry, &sdcardfs_ci_dops); + + if (IS_ROOT(dentry)) + goto out; + + name = dentry->d_name.name; + + /* now start the actual lookup procedure */ + lower_dir_dentry = lower_parent_path->dentry; + lower_dir_mnt = lower_parent_path->mnt; + + /* Use vfs_path_lookup to check if the dentry exists or not */ + if (sbi->options.lower_fs == LOWER_FS_EXT4) { + err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, + LOOKUP_CASE_INSENSITIVE, &lower_nd); + } else if (sbi->options.lower_fs == LOWER_FS_FAT) { + err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, + &lower_nd); + } + + /* no error: handle positive dentries */ + if (!err) { + /* check if the dentry is an obb dentry + * if true, the lower_inode must be replaced with + * the inode of the graft path */ + + if(need_graft_path(dentry)) { + + /* setup_obb_dentry() + * The lower_path will be stored to the dentry's orig_path + * and the base obbpath will be copyed to the lower_path variable. + * if an error returned, there's no change in the lower_path + * returns: -ERRNO if error (0: no error) */ + err = setup_obb_dentry(dentry, &lower_nd.path); + + if(err) { + /* if the sbi->obbpath is not available, we can optionally + * setup the lower_path with its orig_path. + * but, the current implementation just returns an error + * because the sdcard daemon also regards this case as + * a lookup fail. */ + printk(KERN_INFO "sdcardfs: base obbpath is not available\n"); + sdcardfs_put_reset_orig_path(dentry); + goto out; + } + } + + sdcardfs_set_lower_path(dentry, &lower_nd.path); + err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_nd.path); + if (err) /* path_put underlying path on error */ + sdcardfs_put_reset_lower_path(dentry); + goto out; + } + + /* + * We don't consider ENOENT an error, and we want to return a + * negative dentry. + */ + if (err && err != -ENOENT) + goto out; + + /* instatiate a new negative dentry */ + this.name = name; + this.len = strlen(name); + this.hash = full_name_hash(this.name, this.len); + lower_dentry = d_lookup(lower_dir_dentry, &this); + if (lower_dentry) + goto setup_lower; + + lower_dentry = d_alloc(lower_dir_dentry, &this); + if (!lower_dentry) { + err = -ENOMEM; + goto out; + } + d_add(lower_dentry, NULL); /* instantiate and hash */ + +setup_lower: + lower_path.dentry = lower_dentry; + lower_path.mnt = mntget(lower_dir_mnt); + sdcardfs_set_lower_path(dentry, &lower_path); + + /* + * If the intent is to create a file, then don't return an error, so + * the VFS will continue the process of making this negative dentry + * into a positive one. + */ + if (nd) { + if (nd->flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET)) + err = 0; + } else + err = 0; + +out: + return ERR_PTR(err); +} + +/* + * On success: + * fills dentry object appropriate values and returns NULL. + * On fail (== error) + * returns error ptr + * + * @dir : Parent inode. It is locked (dir->i_mutex) + * @dentry : Target dentry to lookup. we should set each of fields. + * (dentry->d_name is initialized already) + * @nd : nameidata of parent inode + */ +struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct dentry *ret = NULL, *parent; + struct path lower_parent_path; + int err = 0; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + const struct cred *saved_cred = NULL; + + parent = dget_parent(dentry); + + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, + sbi->options.derive, 0, 0)) { + ret = ERR_PTR(-EACCES); + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + goto out_err; + } + + /* save current_cred and override it */ + OVERRIDE_CRED_PTR(SDCARDFS_SB(dir->i_sb), saved_cred); + + sdcardfs_get_lower_path(parent, &lower_parent_path); + + /* allocate dentry private data. We free it in ->d_release */ + err = new_dentry_private_data(dentry); + if (err) { + ret = ERR_PTR(err); + goto out; + } + + ret = __sdcardfs_lookup(dentry, nd, &lower_parent_path); + if (IS_ERR(ret)) + { + goto out; + } + if (ret) + dentry = ret; + if (dentry->d_inode) { + fsstack_copy_attr_times(dentry->d_inode, + sdcardfs_lower_inode(dentry->d_inode)); + /* get drived permission */ + get_derived_permission(parent, dentry); + fix_derived_permission(dentry->d_inode); + } + /* update parent directory's atime */ + fsstack_copy_attr_atime(parent->d_inode, + sdcardfs_lower_inode(parent->d_inode)); + +out: + sdcardfs_put_lower_path(parent, &lower_parent_path); + REVERT_CRED(saved_cred); +out_err: + dput(parent); + return ret; +} diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c new file mode 100644 index 000000000000..1fdceffec72c --- /dev/null +++ b/fs/sdcardfs/main.c @@ -0,0 +1,425 @@ +/* + * fs/sdcardfs/main.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include +#include +#include + +enum { + Opt_uid, + Opt_gid, + Opt_wgid, + Opt_debug, + Opt_split, + Opt_derive, + Opt_lower_fs, + Opt_reserved_mb, + Opt_err, +}; + +static const match_table_t sdcardfs_tokens = { + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_wgid, "wgid=%u"}, + {Opt_debug, "debug"}, + {Opt_split, "split"}, + {Opt_derive, "derive=%s"}, + {Opt_lower_fs, "lower_fs=%s"}, + {Opt_reserved_mb, "reserved_mb=%u"}, + {Opt_err, NULL} +}; + +static int parse_options(struct super_block *sb, char *options, int silent, + int *debug, struct sdcardfs_mount_options *opts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + char *string_option; + + /* by default, we use AID_MEDIA_RW as uid, gid */ + opts->fs_low_uid = AID_MEDIA_RW; + opts->fs_low_gid = AID_MEDIA_RW; + /* by default, we use AID_SDCARD_RW as write_gid */ + opts->write_gid = AID_SDCARD_RW; + /* default permission policy + * (DERIVE_NONE | DERIVE_LEGACY | DERIVE_UNIFIED) */ + opts->derive = DERIVE_NONE; + opts->split_perms = 0; + /* by default, we use LOWER_FS_EXT4 as lower fs type */ + opts->lower_fs = LOWER_FS_EXT4; + /* by default, 0MB is reserved */ + opts->reserved_mb = 0; + + *debug = 0; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, sdcardfs_tokens, args); + + switch (token) { + case Opt_debug: + *debug = 1; + break; + case Opt_uid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_low_uid = option; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_low_gid = option; + break; + case Opt_wgid: + if (match_int(&args[0], &option)) + return 0; + opts->write_gid = option; + break; + case Opt_split: + opts->split_perms=1; + break; + case Opt_derive: + string_option = match_strdup(&args[0]); + if (!strcmp("none", string_option)) { + opts->derive = DERIVE_NONE; + } else if (!strcmp("legacy", string_option)) { + opts->derive = DERIVE_LEGACY; + } else if (!strcmp("unified", string_option)) { + opts->derive = DERIVE_UNIFIED; + } else { + kfree(string_option); + goto invalid_option; + } + kfree(string_option); + break; + case Opt_lower_fs: + string_option = match_strdup(&args[0]); + if (!strcmp("ext4", string_option)) { + opts->lower_fs = LOWER_FS_EXT4; + } else if (!strcmp("fat", string_option)) { + opts->lower_fs = LOWER_FS_FAT; + } else { + kfree(string_option); + goto invalid_option; + } + kfree(string_option); + break; + case Opt_reserved_mb: + if (match_int(&args[0], &option)) + return 0; + opts->reserved_mb = option; + break; + /* unknown option */ + default: +invalid_option: + if (!silent) { + printk( KERN_ERR "Unrecognized mount option \"%s\" " + "or missing value", p); + } + return -EINVAL; + } + } + + if (*debug) { + printk( KERN_INFO "sdcardfs : options - debug:%d\n", *debug); + printk( KERN_INFO "sdcardfs : options - uid:%d\n", + opts->fs_low_uid); + printk( KERN_INFO "sdcardfs : options - gid:%d\n", + opts->fs_low_gid); + } + + return 0; +} + +/* + * our custom d_alloc_root work-alike + * + * we can't use d_alloc_root if we want to use our own interpose function + * unchanged, so we simply call our own "fake" d_alloc_root + */ +static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb) +{ + struct dentry *ret = NULL; + + if (sb) { + static const struct qstr name = { + .name = "/", + .len = 1 + }; + + ret = d_alloc(NULL, &name); + if (ret) { + d_set_d_op(ret, &sdcardfs_ci_dops); + ret->d_sb = sb; + ret->d_parent = ret; + } + } + return ret; +} + +/* + * There is no need to lock the sdcardfs_super_info's rwsem as there is no + * way anyone can have a reference to the superblock at this point in time. + */ +static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, + void *raw_data, int silent) +{ + int err = 0; + int debug; + struct super_block *lower_sb; + struct path lower_path; + struct sdcardfs_sb_info *sb_info; + void *pkgl_id; + + printk(KERN_INFO "sdcardfs version 2.0\n"); + + if (!dev_name) { + printk(KERN_ERR + "sdcardfs: read_super: missing dev_name argument\n"); + err = -EINVAL; + goto out; + } + + printk(KERN_INFO "sdcardfs: dev_name -> %s\n", dev_name); + printk(KERN_INFO "sdcardfs: options -> %s\n", (char *)raw_data); + + /* parse lower path */ + err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &lower_path); + if (err) { + printk(KERN_ERR "sdcardfs: error accessing " + "lower directory '%s'\n", dev_name); + goto out; + } + + /* allocate superblock private data */ + sb->s_fs_info = kzalloc(sizeof(struct sdcardfs_sb_info), GFP_KERNEL); + if (!SDCARDFS_SB(sb)) { + printk(KERN_CRIT "sdcardfs: read_super: out of memory\n"); + err = -ENOMEM; + goto out_free; + } + + sb_info = sb->s_fs_info; + + /* parse options */ + err = parse_options(sb, raw_data, silent, &debug, &sb_info->options); + if (err) { + printk(KERN_ERR "sdcardfs: invalid options\n"); + goto out_freesbi; + } + + if (sb_info->options.derive != DERIVE_NONE) { + pkgl_id = packagelist_create(sb_info->options.write_gid); + if(IS_ERR(pkgl_id)) + goto out_freesbi; + else + sb_info->pkgl_id = pkgl_id; + } + + /* set the lower superblock field of upper superblock */ + lower_sb = lower_path.dentry->d_sb; + atomic_inc(&lower_sb->s_active); + sdcardfs_set_lower_super(sb, lower_sb); + + /* inherit maxbytes from lower file system */ + sb->s_maxbytes = lower_sb->s_maxbytes; + + /* + * Our c/m/atime granularity is 1 ns because we may stack on file + * systems whose granularity is as good. + */ + sb->s_time_gran = 1; + + sb->s_magic = SDCARDFS_SUPER_MAGIC; + sb->s_op = &sdcardfs_sops; + + /* see comment next to the definition of sdcardfs_d_alloc_root */ + sb->s_root = sdcardfs_d_alloc_root(sb); + if (!sb->s_root) { + err = -ENOMEM; + goto out_sput; + } + + /* link the upper and lower dentries */ + sb->s_root->d_fsdata = NULL; + err = new_dentry_private_data(sb->s_root); + if (err) + goto out_freeroot; + + /* set the lower dentries for s_root */ + sdcardfs_set_lower_path(sb->s_root, &lower_path); + + /* call interpose to create the upper level inode */ + err = sdcardfs_interpose(sb->s_root, sb, &lower_path); + if (!err) { + /* setup permission policy */ + switch(sb_info->options.derive) { + case DERIVE_NONE: + setup_derived_state(sb->s_root->d_inode, + PERM_ROOT, 0, AID_ROOT, AID_SDCARD_RW, 00775); + sb_info->obbpath_s = NULL; + break; + case DERIVE_LEGACY: + /* Legacy behavior used to support internal multiuser layout which + * places user_id at the top directory level, with the actual roots + * just below that. Shared OBB path is also at top level. */ + setup_derived_state(sb->s_root->d_inode, + PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); + /* initialize the obbpath string and lookup the path + * sb_info->obb_path will be deactivated by path_put + * on sdcardfs_put_super */ + sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); + err = prepare_dir(sb_info->obbpath_s, + sb_info->options.fs_low_uid, + sb_info->options.fs_low_gid, 00755); + if(err) + printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n", + __func__,__LINE__, sb_info->obbpath_s); + break; + case DERIVE_UNIFIED: + /* Unified multiuser layout which places secondary user_id under + * /Android/user and shared OBB path under /Android/obb. */ + setup_derived_state(sb->s_root->d_inode, + PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); + + sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); + break; + } + fix_derived_permission(sb->s_root->d_inode); + + if (!silent) + printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n", + dev_name, lower_sb->s_type->name); + goto out; + } + /* else error: fall through */ + + free_dentry_private_data(sb->s_root); +out_freeroot: + dput(sb->s_root); +out_sput: + /* drop refs we took earlier */ + atomic_dec(&lower_sb->s_active); + packagelist_destroy(sb_info->pkgl_id); +out_freesbi: + kfree(SDCARDFS_SB(sb)); + sb->s_fs_info = NULL; +out_free: + path_put(&lower_path); + +out: + return err; +} + +/* A feature which supports mount_nodev() with options */ +static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + int (*fill_super)(struct super_block *, const char *, void *, int)) + +{ + int error; + struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); + + if (IS_ERR(s)) + return ERR_CAST(s); + + s->s_flags = flags; + + error = fill_super(s, dev_name, data, flags & MS_SILENT ? 1 : 0); + if (error) { + deactivate_locked_super(s); + return ERR_PTR(error); + } + s->s_flags |= MS_ACTIVE; + return dget(s->s_root); +} + +struct dentry *sdcardfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + /* + * dev_name is a lower_path_name, + * raw_data is a option string. + */ + return mount_nodev_with_options(fs_type, flags, dev_name, + raw_data, sdcardfs_read_super); +} + +static struct file_system_type sdcardfs_fs_type = { + .owner = THIS_MODULE, + .name = SDCARDFS_NAME, + .mount = sdcardfs_mount, + .kill_sb = generic_shutdown_super, + .fs_flags = FS_REVAL_DOT, +}; + +static int __init init_sdcardfs_fs(void) +{ + int err; + + pr_info("Registering sdcardfs " SDCARDFS_VERSION "\n"); + + err = sdcardfs_init_inode_cache(); + if (err) + goto out; + err = sdcardfs_init_dentry_cache(); + if (err) + goto out; + err = packagelist_init(); + if (err) + goto out; + err = register_filesystem(&sdcardfs_fs_type); +out: + if (err) { + sdcardfs_destroy_inode_cache(); + sdcardfs_destroy_dentry_cache(); + packagelist_exit(); + } + return err; +} + +static void __exit exit_sdcardfs_fs(void) +{ + sdcardfs_destroy_inode_cache(); + sdcardfs_destroy_dentry_cache(); + packagelist_exit(); + unregister_filesystem(&sdcardfs_fs_type); + pr_info("Completed sdcardfs module unload\n"); +} + +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University" + " (http://www.fsl.cs.sunysb.edu/)"); +MODULE_DESCRIPTION("Wrapfs " SDCARDFS_VERSION + " (http://wrapfs.filesystems.org/)"); +MODULE_LICENSE("GPL"); + +module_init(init_sdcardfs_fs); +module_exit(exit_sdcardfs_fs); diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c new file mode 100644 index 000000000000..c807d7f18f8b --- /dev/null +++ b/fs/sdcardfs/mmap.c @@ -0,0 +1,82 @@ +/* + * fs/sdcardfs/mmap.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" + +static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + int err; + struct file *file, *lower_file; + const struct vm_operations_struct *lower_vm_ops; + struct vm_area_struct lower_vma; + + memcpy(&lower_vma, vma, sizeof(struct vm_area_struct)); + file = lower_vma.vm_file; + lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops; + BUG_ON(!lower_vm_ops); + + lower_file = sdcardfs_lower_file(file); + /* + * XXX: vm_ops->fault may be called in parallel. Because we have to + * resort to temporarily changing the vma->vm_file to point to the + * lower file, a concurrent invocation of sdcardfs_fault could see a + * different value. In this workaround, we keep a different copy of + * the vma structure in our stack, so we never expose a different + * value of the vma->vm_file called to us, even temporarily. A + * better fix would be to change the calling semantics of ->fault to + * take an explicit file pointer. + */ + lower_vma.vm_file = lower_file; + err = lower_vm_ops->fault(&lower_vma, vmf); + return err; +} + +static ssize_t sdcardfs_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) +{ + /* + * This function returns zero on purpose in order to support direct IO. + * __dentry_open checks a_ops->direct_IO and returns EINVAL if it is null. + * + * However, this function won't be called by certain file operations + * including generic fs functions. * reads and writes are delivered to + * the lower file systems and the direct IOs will be handled by them. + * + * NOTE: exceptionally, on the recent kernels (since Linux 3.8.x), + * swap_writepage invokes this function directly. + */ + printk(KERN_INFO "%s, operation is not supported\n", __func__); + return 0; +} + +/* + * XXX: the default address_space_ops for sdcardfs is empty. We cannot set + * our inode->i_mapping->a_ops to NULL because too many code paths expect + * the a_ops vector to be non-NULL. + */ +const struct address_space_operations sdcardfs_aops = { + /* empty on purpose */ + .direct_IO = sdcardfs_direct_IO, +}; + +const struct vm_operations_struct sdcardfs_vm_ops = { + .fault = sdcardfs_fault, +}; diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h new file mode 100644 index 000000000000..923ba101dfa9 --- /dev/null +++ b/fs/sdcardfs/multiuser.h @@ -0,0 +1,37 @@ +/* + * fs/sdcardfs/multiuser.h + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#define MULTIUSER_APP_PER_USER_RANGE 100000 + +typedef uid_t userid_t; +typedef uid_t appid_t; + +static inline userid_t multiuser_get_user_id(uid_t uid) { + return uid / MULTIUSER_APP_PER_USER_RANGE; +} + +static inline appid_t multiuser_get_app_id(uid_t uid) { + return uid % MULTIUSER_APP_PER_USER_RANGE; +} + +static inline uid_t multiuser_get_uid(userid_t userId, appid_t appId) { + return userId * MULTIUSER_APP_PER_USER_RANGE + (appId % MULTIUSER_APP_PER_USER_RANGE); +} + diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c new file mode 100644 index 000000000000..c786d8f92203 --- /dev/null +++ b/fs/sdcardfs/packagelist.c @@ -0,0 +1,458 @@ +/* + * fs/sdcardfs/packagelist.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" +#include "strtok.h" +#include "hashtable.h" +#include +#include +#include +#include + +#define STRING_BUF_SIZE (512) + +struct hashtable_entry { + struct hlist_node hlist; + void *key; + int value; +}; + +struct packagelist_data { + DECLARE_HASHTABLE(package_to_appid,8); + DECLARE_HASHTABLE(appid_with_rw,7); + struct mutex hashtable_lock; + struct task_struct *thread_id; + gid_t write_gid; + char *strtok_last; + char read_buf[STRING_BUF_SIZE]; + char event_buf[STRING_BUF_SIZE]; + char app_name_buf[STRING_BUF_SIZE]; + char gids_buf[STRING_BUF_SIZE]; +}; + +static struct kmem_cache *hashtable_entry_cachep; + +/* Path to system-provided mapping of package name to appIds */ +static const char* const kpackageslist_file = "/data/system/packages.list"; +/* Supplementary groups to execute with */ +static const gid_t kgroups[1] = { AID_PACKAGE_INFO }; + +static unsigned int str_hash(void *key) { + int i; + unsigned int h = strlen(key); + char *data = (char *)key; + + for (i = 0; i < strlen(key); i++) { + h = h * 31 + *data; + data++; + } + return h; +} + +static int contain_appid_key(struct packagelist_data *pkgl_dat, void *appid) { + struct hashtable_entry *hash_cur; + struct hlist_node *h_n; + + hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, (unsigned int)appid, h_n) + if (appid == hash_cur->key) + return 1; + return 0; +} + +/* Return if the calling UID holds sdcard_rw. */ +int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) { + struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; + appid_t appid; + int ret; + + /* No additional permissions enforcement */ + if (derive == DERIVE_NONE) { + return 1; + } + + appid = multiuser_get_app_id(current_fsuid()); + mutex_lock(&pkgl_dat->hashtable_lock); + ret = contain_appid_key(pkgl_dat, (void *)appid); + mutex_unlock(&pkgl_dat->hashtable_lock); + return ret; +} + +appid_t get_appid(void *pkgl_id, const char *app_name) +{ + struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; + struct hashtable_entry *hash_cur; + struct hlist_node *h_n; + unsigned int hash = str_hash((void *)app_name); + appid_t ret_id; + + //printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash); + mutex_lock(&pkgl_dat->hashtable_lock); + hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) { + //printk(KERN_INFO "sdcardfs: %s: %s\n", __func__, (char *)hash_cur->key); + if (!strcasecmp(app_name, hash_cur->key)) { + ret_id = (appid_t)hash_cur->value; + mutex_unlock(&pkgl_dat->hashtable_lock); + //printk(KERN_INFO "=> app_id: %d\n", (int)ret_id); + return ret_id; + } + } + mutex_unlock(&pkgl_dat->hashtable_lock); + //printk(KERN_INFO "=> app_id: %d\n", 0); + return 0; +} + +/* Kernel has already enforced everything we returned through + * derive_permissions_locked(), so this is used to lock down access + * even further, such as enforcing that apps hold sdcard_rw. */ +int check_caller_access_to_name(struct inode *parent_node, const char* name, + derive_t derive, int w_ok, int has_rw) { + + /* Always block security-sensitive files at root */ + if (parent_node && SDCARDFS_I(parent_node)->perm == PERM_ROOT) { + if (!strcasecmp(name, "autorun.inf") + || !strcasecmp(name, ".android_secure") + || !strcasecmp(name, "android_secure")) { + return 0; + } + } + + /* No additional permissions enforcement */ + if (derive == DERIVE_NONE) { + return 1; + } + + /* Root always has access; access for any other UIDs should always + * be controlled through packages.list. */ + if (current_fsuid() == 0) { + return 1; + } + + /* If asking to write, verify that caller either owns the + * parent or holds sdcard_rw. */ + if (w_ok) { + if (parent_node && + (current_fsuid() == SDCARDFS_I(parent_node)->d_uid)) { + return 1; + } + return has_rw; + } + + /* No extra permissions to enforce */ + return 1; +} + +/* This function is used when file opening. The open flags must be + * checked before calling check_caller_access_to_name() */ +int open_flags_to_access_mode(int open_flags) { + if((open_flags & O_ACCMODE) == O_RDONLY) { + return 0; /* R_OK */ + } else if ((open_flags & O_ACCMODE) == O_WRONLY) { + return 1; /* W_OK */ + } else { + /* Probably O_RDRW, but treat as default to be safe */ + return 1; /* R_OK | W_OK */ + } +} + +static int insert_str_to_int(struct packagelist_data *pkgl_dat, void *key, int value) { + struct hashtable_entry *hash_cur; + struct hashtable_entry *new_entry; + struct hlist_node *h_n; + unsigned int hash = str_hash(key); + + //printk(KERN_INFO "sdcardfs: %s: %s: %d, %u\n", __func__, (char *)key, value, hash); + hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) { + if (!strcasecmp(key, hash_cur->key)) { + hash_cur->value = value; + return 0; + } + } + new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL); + if (!new_entry) + return -ENOMEM; + new_entry->key = kstrdup(key, GFP_KERNEL); + new_entry->value = value; + hash_add(pkgl_dat->package_to_appid, &new_entry->hlist, hash); + return 0; +} + +static void remove_str_to_int(struct hashtable_entry *h_entry) { + //printk(KERN_INFO "sdcardfs: %s: %s: %d\n", __func__, (char *)h_entry->key, h_entry->value); + kfree(h_entry->key); + kmem_cache_free(hashtable_entry_cachep, h_entry); +} + +static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int value) { + struct hashtable_entry *hash_cur; + struct hashtable_entry *new_entry; + struct hlist_node *h_n; + + //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value); + hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, + (unsigned int)key, h_n) { + if (key == hash_cur->key) { + hash_cur->value = value; + return 0; + } + } + new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL); + if (!new_entry) + return -ENOMEM; + new_entry->key = key; + new_entry->value = value; + hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist, + (unsigned int)new_entry->key); + return 0; +} + +static void remove_int_to_null(struct hashtable_entry *h_entry) { + //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)h_entry->key, h_entry->value); + kmem_cache_free(hashtable_entry_cachep, h_entry); +} + +static void remove_all_hashentrys(struct packagelist_data *pkgl_dat) +{ + struct hashtable_entry *hash_cur; + struct hlist_node *h_n; + struct hlist_node *h_t; + int i; + + hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist, h_n) + remove_str_to_int(hash_cur); + hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist, h_n) + remove_int_to_null(hash_cur); + + hash_init(pkgl_dat->package_to_appid); + hash_init(pkgl_dat->appid_with_rw); +} + +static int read_package_list(struct packagelist_data *pkgl_dat) { + int ret; + int fd; + int read_amount; + + printk(KERN_INFO "sdcardfs: read_package_list\n"); + + mutex_lock(&pkgl_dat->hashtable_lock); + + remove_all_hashentrys(pkgl_dat); + + fd = sys_open(kpackageslist_file, O_RDONLY, 0); + if (fd < 0) { + printk(KERN_ERR "sdcardfs: failed to open package list\n"); + mutex_unlock(&pkgl_dat->hashtable_lock); + return fd; + } + + while ((read_amount = sys_read(fd, pkgl_dat->read_buf, + sizeof(pkgl_dat->read_buf))) > 0) { + int appid; + char *token; + int one_line_len = 0; + int additional_read; + unsigned long ret_gid; + + while (one_line_len < read_amount) { + if (pkgl_dat->read_buf[one_line_len] == '\n') { + one_line_len++; + break; + } + one_line_len++; + } + additional_read = read_amount - one_line_len; + if (additional_read > 0) + sys_lseek(fd, -additional_read, SEEK_CUR); + + if (sscanf(pkgl_dat->read_buf, "%s %d %*d %*s %*s %s", + pkgl_dat->app_name_buf, &appid, + pkgl_dat->gids_buf) == 3) { + ret = insert_str_to_int(pkgl_dat, pkgl_dat->app_name_buf, appid); + if (ret) { + sys_close(fd); + mutex_unlock(&pkgl_dat->hashtable_lock); + return ret; + } + + token = strtok_r(pkgl_dat->gids_buf, ",", &pkgl_dat->strtok_last); + while (token != NULL) { + if (!kstrtoul(token, 10, &ret_gid) && + (ret_gid == pkgl_dat->write_gid)) { + ret = insert_int_to_null(pkgl_dat, (void *)appid, 1); + if (ret) { + sys_close(fd); + mutex_unlock(&pkgl_dat->hashtable_lock); + return ret; + } + break; + } + token = strtok_r(NULL, ",", &pkgl_dat->strtok_last); + } + } + } + + sys_close(fd); + mutex_unlock(&pkgl_dat->hashtable_lock); + return 0; +} + +static int packagelist_reader(void *thread_data) +{ + struct packagelist_data *pkgl_dat = (struct packagelist_data *)thread_data; + struct inotify_event *event; + bool active = false; + int event_pos; + int event_size; + int res = 0; + int nfd; + + allow_signal(SIGINT); + + nfd = sys_inotify_init(); + if (nfd < 0) { + printk(KERN_ERR "sdcardfs: inotify_init failed: %d\n", nfd); + return nfd; + } + + while (!kthread_should_stop()) { + if (signal_pending(current)) { + ssleep(1); + continue; + } + + if (!active) { + res = sys_inotify_add_watch(nfd, kpackageslist_file, IN_DELETE_SELF); + if (res < 0) { + if (res == -ENOENT || res == -EACCES) { + /* Framework may not have created yet, sleep and retry */ + printk(KERN_ERR "sdcardfs: missing packages.list; retrying\n"); + ssleep(2); + printk(KERN_ERR "sdcardfs: missing packages.list_end; retrying\n"); + continue; + } else { + printk(KERN_ERR "sdcardfs: inotify_add_watch failed: %d\n", res); + goto interruptable_sleep; + } + } + /* Watch above will tell us about any future changes, so + * read the current state. */ + res = read_package_list(pkgl_dat); + if (res) { + printk(KERN_ERR "sdcardfs: read_package_list failed: %d\n", res); + goto interruptable_sleep; + } + active = true; + } + + event_pos = 0; + res = sys_read(nfd, pkgl_dat->event_buf, sizeof(pkgl_dat->event_buf)); + if (res < (int) sizeof(*event)) { + if (res == -EINTR) + continue; + printk(KERN_ERR "sdcardfs: failed to read inotify event: %d\n", res); + goto interruptable_sleep; + } + + while (res >= (int) sizeof(*event)) { + event = (struct inotify_event *) (pkgl_dat->event_buf + event_pos); + + printk(KERN_INFO "sdcardfs: inotify event: %08x\n", event->mask); + if ((event->mask & IN_IGNORED) == IN_IGNORED) { + /* Previously watched file was deleted, probably due to move + * that swapped in new data; re-arm the watch and read. */ + active = false; + } + + event_size = sizeof(*event) + event->len; + res -= event_size; + event_pos += event_size; + } + continue; + +interruptable_sleep: + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + flush_signals(current); + sys_close(nfd); + return res; +} + +void * packagelist_create(gid_t write_gid) +{ + struct packagelist_data *pkgl_dat; + struct task_struct *packagelist_thread; + + pkgl_dat = kmalloc(sizeof(*pkgl_dat), GFP_KERNEL | __GFP_ZERO); + if (!pkgl_dat) { + printk(KERN_ERR "sdcardfs: creating kthread failed\n"); + return ERR_PTR(-ENOMEM); + } + + mutex_init(&pkgl_dat->hashtable_lock); + hash_init(pkgl_dat->package_to_appid); + hash_init(pkgl_dat->appid_with_rw); + pkgl_dat->write_gid = write_gid; + + packagelist_thread = kthread_run(packagelist_reader, (void *)pkgl_dat, "pkgld"); + if (IS_ERR(packagelist_thread)) { + printk(KERN_ERR "sdcardfs: creating kthread failed\n"); + kfree(pkgl_dat); + return packagelist_thread; + } + pkgl_dat->thread_id = packagelist_thread; + + printk(KERN_INFO "sdcardfs: created packagelist pkgld/%d\n", + (int)pkgl_dat->thread_id->pid); + + return (void *)pkgl_dat; +} + +void packagelist_destroy(void *pkgl_id) +{ + struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; + pid_t pkgl_pid = pkgl_dat->thread_id->pid; + + force_sig_info(SIGINT, SEND_SIG_PRIV, pkgl_dat->thread_id); + kthread_stop(pkgl_dat->thread_id); + remove_all_hashentrys(pkgl_dat); + printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld/%d\n", (int)pkgl_pid); + kfree(pkgl_dat); +} + +int packagelist_init(void) +{ + hashtable_entry_cachep = + kmem_cache_create("packagelist_hashtable_entry", + sizeof(struct hashtable_entry), 0, 0, NULL); + if (!hashtable_entry_cachep) { + printk(KERN_ERR "sdcardfs: failed creating pkgl_hashtable entry slab cache\n"); + return -ENOMEM; + } + + return 0; +} + +void packagelist_exit(void) +{ + if (hashtable_entry_cachep) + kmem_cache_destroy(hashtable_entry_cachep); +} + + diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h new file mode 100644 index 000000000000..90f8b24e4a52 --- /dev/null +++ b/fs/sdcardfs/sdcardfs.h @@ -0,0 +1,493 @@ +/* + * fs/sdcardfs/sdcardfs.h + * + * The sdcardfs v2.0 + * This file system replaces the sdcard daemon on Android + * On version 2.0, some of the daemon functions have been ported + * to support the multi-user concepts of Android 4.4 + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#ifndef _SDCARDFS_H_ +#define _SDCARDFS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "multiuser.h" + +/* the file system name */ +#define SDCARDFS_NAME "sdcardfs" + +/* sdcardfs root inode number */ +#define SDCARDFS_ROOT_INO 1 + +/* useful for tracking code reachability */ +#define UDBG printk(KERN_DEFAULT "DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) + +#define SDCARDFS_DIRENT_SIZE 256 + +/* temporary static uid settings for development */ +#define AID_ROOT 0 /* uid for accessing /mnt/sdcard & extSdcard */ +#define AID_MEDIA_RW 1023 /* internal media storage write access */ + +#define AID_SDCARD_RW 1015 /* external storage write access */ +#define AID_SDCARD_R 1028 /* external storage read access */ +#define AID_SDCARD_PICS 1033 /* external storage photos access */ +#define AID_SDCARD_AV 1034 /* external storage audio/video access */ +#define AID_SDCARD_ALL 1035 /* access all users external storage */ + +#define AID_PACKAGE_INFO 1027 + +#define fix_derived_permission(x) \ + do { \ + (x)->i_uid = SDCARDFS_I(x)->d_uid; \ + (x)->i_gid = SDCARDFS_I(x)->d_gid; \ + (x)->i_mode = ((x)->i_mode & S_IFMT) | SDCARDFS_I(x)->d_mode;\ + } while (0) + +/* OVERRIDE_CRED() and REVERT_CRED() + * OVERRID_CRED() + * backup original task->cred + * and modifies task->cred->fsuid/fsgid to specified value. + * REVERT_CRED() + * restore original task->cred->fsuid/fsgid. + * These two macro should be used in pair, and OVERRIDE_CRED() should be + * placed at the beginning of a function, right after variable declaration. + */ +#define OVERRIDE_CRED(sdcardfs_sbi, saved_cred) \ + saved_cred = override_fsids(sdcardfs_sbi); \ + if (!saved_cred) { return -ENOMEM; } + +#define OVERRIDE_CRED_PTR(sdcardfs_sbi, saved_cred) \ + saved_cred = override_fsids(sdcardfs_sbi); \ + if (!saved_cred) { return ERR_PTR(-ENOMEM); } + +#define REVERT_CRED(saved_cred) revert_fsids(saved_cred) + +#define DEBUG_CRED() \ + printk("KAKJAGI: %s:%d fsuid %d fsgid %d\n", \ + __FUNCTION__, __LINE__, \ + (int)current->cred->fsuid, \ + (int)current->cred->fsgid); + +/* Android 4.4 support */ + +/* Permission mode for a specific node. Controls how file permissions + * are derived for children nodes. */ +typedef enum { + /* Nothing special; this node should just inherit from its parent. */ + PERM_INHERIT, + /* This node is one level above a normal root; used for legacy layouts + * which use the first level to represent user_id. */ + PERM_LEGACY_PRE_ROOT, + /* This node is "/" */ + PERM_ROOT, + /* This node is "/Android" */ + PERM_ANDROID, + /* This node is "/Android/data" */ + PERM_ANDROID_DATA, + /* This node is "/Android/obb" */ + PERM_ANDROID_OBB, + /* This node is "/Android/user" */ + PERM_ANDROID_USER, +} perm_t; + +/* Permissions structure to derive */ +typedef enum { + DERIVE_NONE, + DERIVE_LEGACY, + DERIVE_UNIFIED, +} derive_t; + +typedef enum { + LOWER_FS_EXT4, + LOWER_FS_FAT, +} lower_fs_t; + +struct sdcardfs_sb_info; +struct sdcardfs_mount_options; + +/* Do not directly use this function. Use OVERRIDE_CRED() instead. */ +const struct cred * override_fsids(struct sdcardfs_sb_info* sbi); +/* Do not directly use this function, use REVERT_CRED() instead. */ +void revert_fsids(const struct cred * old_cred); + +/* operations vectors defined in specific files */ +extern const struct file_operations sdcardfs_main_fops; +extern const struct file_operations sdcardfs_dir_fops; +extern const struct inode_operations sdcardfs_main_iops; +extern const struct inode_operations sdcardfs_dir_iops; +extern const struct inode_operations sdcardfs_symlink_iops; +extern const struct super_operations sdcardfs_sops; +extern const struct dentry_operations sdcardfs_ci_dops; +extern const struct address_space_operations sdcardfs_aops, sdcardfs_dummy_aops; +extern const struct vm_operations_struct sdcardfs_vm_ops; + +extern int sdcardfs_init_inode_cache(void); +extern void sdcardfs_destroy_inode_cache(void); +extern int sdcardfs_init_dentry_cache(void); +extern void sdcardfs_destroy_dentry_cache(void); +extern int new_dentry_private_data(struct dentry *dentry); +extern void free_dentry_private_data(struct dentry *dentry); +extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd); +extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path); + +/* file private data */ +struct sdcardfs_file_info { + struct file *lower_file; + const struct vm_operations_struct *lower_vm_ops; +}; + +/* sdcardfs inode data in memory */ +struct sdcardfs_inode_info { + struct inode *lower_inode; + /* state derived based on current position in hierachy + * caution: d_mode does not include file types + */ + perm_t perm; + userid_t userid; + uid_t d_uid; + gid_t d_gid; + mode_t d_mode; + + struct inode vfs_inode; +}; + +/* sdcardfs dentry data in memory */ +struct sdcardfs_dentry_info { + spinlock_t lock; /* protects lower_path */ + struct path lower_path; + struct path orig_path; +}; + +struct sdcardfs_mount_options { + uid_t fs_low_uid; + gid_t fs_low_gid; + gid_t write_gid; + int split_perms; + derive_t derive; + lower_fs_t lower_fs; + unsigned int reserved_mb; +}; + +/* sdcardfs super-block data in memory */ +struct sdcardfs_sb_info { + struct super_block *lower_sb; + /* derived perm policy : some of options have been added + * to sdcardfs_mount_options (Android 4.4 support) */ + struct sdcardfs_mount_options options; + spinlock_t lock; /* protects obbpath */ + char *obbpath_s; + struct path obbpath; + void *pkgl_id; +}; + +/* + * inode to private data + * + * Since we use containers and the struct inode is _inside_ the + * sdcardfs_inode_info structure, SDCARDFS_I will always (given a non-NULL + * inode pointer), return a valid non-NULL pointer. + */ +static inline struct sdcardfs_inode_info *SDCARDFS_I(const struct inode *inode) +{ + return container_of(inode, struct sdcardfs_inode_info, vfs_inode); +} + +/* dentry to private data */ +#define SDCARDFS_D(dent) ((struct sdcardfs_dentry_info *)(dent)->d_fsdata) + +/* superblock to private data */ +#define SDCARDFS_SB(super) ((struct sdcardfs_sb_info *)(super)->s_fs_info) + +/* file to private Data */ +#define SDCARDFS_F(file) ((struct sdcardfs_file_info *)((file)->private_data)) + +/* file to lower file */ +static inline struct file *sdcardfs_lower_file(const struct file *f) +{ + return SDCARDFS_F(f)->lower_file; +} + +static inline void sdcardfs_set_lower_file(struct file *f, struct file *val) +{ + SDCARDFS_F(f)->lower_file = val; +} + +/* inode to lower inode. */ +static inline struct inode *sdcardfs_lower_inode(const struct inode *i) +{ + return SDCARDFS_I(i)->lower_inode; +} + +static inline void sdcardfs_set_lower_inode(struct inode *i, struct inode *val) +{ + SDCARDFS_I(i)->lower_inode = val; +} + +/* superblock to lower superblock */ +static inline struct super_block *sdcardfs_lower_super( + const struct super_block *sb) +{ + return SDCARDFS_SB(sb)->lower_sb; +} + +static inline void sdcardfs_set_lower_super(struct super_block *sb, + struct super_block *val) +{ + SDCARDFS_SB(sb)->lower_sb = val; +} + +/* path based (dentry/mnt) macros */ +static inline void pathcpy(struct path *dst, const struct path *src) +{ + dst->dentry = src->dentry; + dst->mnt = src->mnt; +} + +/* sdcardfs_get_pname functions calls path_get() + * therefore, the caller must call "proper" path_put functions + */ +#define SDCARDFS_DENT_FUNC(pname) \ +static inline void sdcardfs_get_##pname(const struct dentry *dent, \ + struct path *pname) \ +{ \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + pathcpy(pname, &SDCARDFS_D(dent)->pname); \ + path_get(pname); \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} \ +static inline void sdcardfs_put_##pname(const struct dentry *dent, \ + struct path *pname) \ +{ \ + path_put(pname); \ + return; \ +} \ +static inline void sdcardfs_set_##pname(const struct dentry *dent, \ + struct path *pname) \ +{ \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + pathcpy(&SDCARDFS_D(dent)->pname, pname); \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} \ +static inline void sdcardfs_reset_##pname(const struct dentry *dent) \ +{ \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + SDCARDFS_D(dent)->pname.dentry = NULL; \ + SDCARDFS_D(dent)->pname.mnt = NULL; \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} \ +static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \ +{ \ + struct path pname; \ + spin_lock(&SDCARDFS_D(dent)->lock); \ + if(SDCARDFS_D(dent)->pname.dentry) { \ + pathcpy(&pname, &SDCARDFS_D(dent)->pname); \ + SDCARDFS_D(dent)->pname.dentry = NULL; \ + SDCARDFS_D(dent)->pname.mnt = NULL; \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + path_put(&pname); \ + } else \ + spin_unlock(&SDCARDFS_D(dent)->lock); \ + return; \ +} + +SDCARDFS_DENT_FUNC(lower_path) +SDCARDFS_DENT_FUNC(orig_path) + +static inline int has_graft_path(const struct dentry *dent) +{ + int ret = 0; + + spin_lock(&SDCARDFS_D(dent)->lock); + if (SDCARDFS_D(dent)->orig_path.dentry != NULL) + ret = 1; + spin_unlock(&SDCARDFS_D(dent)->lock); + + return ret; +} + +static inline void sdcardfs_get_real_lower(const struct dentry *dent, + struct path *real_lower) +{ + /* in case of a local obb dentry + * the orig_path should be returned + */ + if(has_graft_path(dent)) + sdcardfs_get_orig_path(dent, real_lower); + else + sdcardfs_get_lower_path(dent, real_lower); +} + +static inline void sdcardfs_put_real_lower(const struct dentry *dent, + struct path *real_lower) +{ + if(has_graft_path(dent)) + sdcardfs_put_orig_path(dent, real_lower); + else + sdcardfs_put_lower_path(dent, real_lower); +} + +/* for packagelist.c */ +extern int get_caller_has_rw_locked(void *pkgl_id, derive_t derive); +extern appid_t get_appid(void *pkgl_id, const char *app_name); +extern int check_caller_access_to_name(struct inode *parent_node, const char* name, + derive_t derive, int w_ok, int has_rw); +extern int open_flags_to_access_mode(int open_flags); +extern void * packagelist_create(gid_t write_gid); +extern void packagelist_destroy(void *pkgl_id); +extern int packagelist_init(void); +extern void packagelist_exit(void); + +/* for derived_perm.c */ +extern void setup_derived_state(struct inode *inode, perm_t perm, + userid_t userid, uid_t uid, gid_t gid, mode_t mode); +extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); +extern void update_derived_permission(struct dentry *dentry); +extern int need_graft_path(struct dentry *dentry); +extern int is_base_obbpath(struct dentry *dentry); +extern int is_obbpath_invalid(struct dentry *dentry); +extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path); + +/* locking helpers */ +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir = dget_parent(dentry); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); + return dir; +} + +static inline void unlock_dir(struct dentry *dir) +{ + mutex_unlock(&dir->d_inode->i_mutex); + dput(dir); +} + +static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t mode) +{ + int err; + struct dentry *dent; + struct iattr attrs; + struct nameidata nd; + + err = kern_path_parent(path_s, &nd); + if (err) { + if (err == -EEXIST) + err = 0; + goto out; + } + + dent = lookup_create(&nd, 1); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + if (err == -EEXIST) + err = 0; + goto out_unlock; + } + + err = vfs_mkdir(nd.path.dentry->d_inode, dent, mode); + if (err) { + if (err == -EEXIST) + err = 0; + goto out_dput; + } + + attrs.ia_uid = uid; + attrs.ia_gid = gid; + attrs.ia_valid = ATTR_UID | ATTR_GID; + mutex_lock(&dent->d_inode->i_mutex); + notify_change(dent, &attrs); + mutex_unlock(&dent->d_inode->i_mutex); + +out_dput: + dput(dent); + +out_unlock: + /* parent dentry locked by lookup_create */ + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); + +out: + return err; +} + +/* + * Return 1, if a disk has enough free space, otherwise 0. + * We assume that any files can not be overwritten. + */ +static inline int check_min_free_space(struct dentry *dentry, size_t size, int dir) +{ + int err; + struct path lower_path; + struct kstatfs statfs; + u64 avail; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + if (sbi->options.reserved_mb) { + /* Get fs stat of lower filesystem. */ + sdcardfs_get_lower_path(dentry, &lower_path); + err = vfs_statfs(&lower_path, &statfs); + sdcardfs_put_lower_path(dentry, &lower_path); + + if (unlikely(err)) + return 0; + + /* Invalid statfs informations. */ + if (unlikely(statfs.f_bsize == 0)) + return 0; + + /* if you are checking directory, set size to f_bsize. */ + if (unlikely(dir)) + size = statfs.f_bsize; + + /* available size */ + avail = statfs.f_bavail * statfs.f_bsize; + + /* not enough space */ + if ((u64)size > avail) + return 0; + + /* enough space */ + if ((avail - size) > (sbi->options.reserved_mb * 1024 * 1024)) + return 1; + + return 0; + } else + return 1; +} + +#endif /* not _SDCARDFS_H_ */ diff --git a/fs/sdcardfs/strtok.h b/fs/sdcardfs/strtok.h new file mode 100644 index 000000000000..50ab25aa0bc4 --- /dev/null +++ b/fs/sdcardfs/strtok.h @@ -0,0 +1,75 @@ +/* + * fs/sdcardfs/strtok.h + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +static char * +strtok_r(char *s, const char *delim, char **last) +{ + char *spanp; + int c, sc; + char *tok; + + + /* if (s == NULL && (s = *last) == NULL) + return NULL; */ + if (s == NULL) { + s = *last; + if (s == NULL) + return NULL; + } + + /* + * Skip (span) leading delimiters (s += strspn(s, delim), sort of). + */ +cont: + c = *s++; + for (spanp = (char *)delim; (sc = *spanp++) != 0;) { + if (c == sc) + goto cont; + } + + if (c == 0) { /* no non-delimiter characters */ + *last = NULL; + return NULL; + } + tok = s - 1; + + /* + * Scan token (scan for delimiters: s += strcspn(s, delim), sort of). + * Note that delim must have one NUL; we stop if we see that, too. + */ + for (;;) { + c = *s++; + spanp = (char *)delim; + do { + sc = *spanp++; + if (sc == c) { + if (c == 0) + s = NULL; + else + s[-1] = 0; + *last = s; + return tok; + } + } while (sc != 0); + } + + /* NOTREACHED */ +} + diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c new file mode 100644 index 000000000000..1d206c82dfdf --- /dev/null +++ b/fs/sdcardfs/super.c @@ -0,0 +1,229 @@ +/* + * fs/sdcardfs/super.c + * + * Copyright (c) 2013 Samsung Electronics Co. Ltd + * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, + * Sunghwan Yun, Sungjong Seo + * + * This program has been developed as a stackable file system based on + * the WrapFS which written by + * + * Copyright (c) 1998-2011 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2011 Stony Brook University + * Copyright (c) 2003-2011 The Research Foundation of SUNY + * + * This file is dual licensed. It may be redistributed and/or modified + * under the terms of the Apache 2.0 License OR version 2 of the GNU + * General Public License. + */ + +#include "sdcardfs.h" + +/* + * The inode cache is used with alloc_inode for both our inode info and the + * vfs inode. + */ +static struct kmem_cache *sdcardfs_inode_cachep; + +/* final actions when unmounting a file system */ +static void sdcardfs_put_super(struct super_block *sb) +{ + struct sdcardfs_sb_info *spd; + struct super_block *s; + + spd = SDCARDFS_SB(sb); + if (!spd) + return; + + if(spd->obbpath_s) { + kfree(spd->obbpath_s); + path_put(&spd->obbpath); + } + + /* decrement lower super references */ + s = sdcardfs_lower_super(sb); + sdcardfs_set_lower_super(sb, NULL); + atomic_dec(&s->s_active); + + if(spd->pkgl_id) + packagelist_destroy(spd->pkgl_id); + + kfree(spd); + sb->s_fs_info = NULL; +} + +static int sdcardfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int err; + struct path lower_path; + u32 min_blocks; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + sdcardfs_get_lower_path(dentry, &lower_path); + err = vfs_statfs(&lower_path, buf); + sdcardfs_put_lower_path(dentry, &lower_path); + + if (sbi->options.reserved_mb) { + /* Invalid statfs informations. */ + if (buf->f_bsize == 0) { + printk(KERN_ERR "Returned block size is zero.\n"); + return -EINVAL; + } + + min_blocks = ((sbi->options.reserved_mb * 1024 * 1024)/buf->f_bsize); + buf->f_blocks -= min_blocks; + + if (buf->f_bavail > min_blocks) + buf->f_bavail -= min_blocks; + else + buf->f_bavail = 0; + + /* Make reserved blocks invisiable to media storage */ + buf->f_bfree = buf->f_bavail; + } + + /* set return buf to our f/s to avoid confusing user-level utils */ + buf->f_type = SDCARDFS_SUPER_MAGIC; + + return err; +} + +/* + * @flags: numeric mount options + * @options: mount options string + */ +static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options) +{ + int err = 0; + + /* + * The VFS will take care of "ro" and "rw" flags among others. We + * can safely accept a few flags (RDONLY, MANDLOCK), and honor + * SILENT, but anything else left over is an error. + */ + if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) { + printk(KERN_ERR + "sdcardfs: remount flags 0x%x unsupported\n", *flags); + err = -EINVAL; + } + + return err; +} + +/* + * Called by iput() when the inode reference count reached zero + * and the inode is not hashed anywhere. Used to clear anything + * that needs to be, before the inode is completely destroyed and put + * on the inode free list. + */ +static void sdcardfs_evict_inode(struct inode *inode) +{ + struct inode *lower_inode; + + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); + /* + * Decrement a reference to a lower_inode, which was incremented + * by our read_inode when it was created initially. + */ + lower_inode = sdcardfs_lower_inode(inode); + sdcardfs_set_lower_inode(inode, NULL); + iput(lower_inode); +} + +static struct inode *sdcardfs_alloc_inode(struct super_block *sb) +{ + struct sdcardfs_inode_info *i; + + i = kmem_cache_alloc(sdcardfs_inode_cachep, GFP_KERNEL); + if (!i) + return NULL; + + /* memset everything up to the inode to 0 */ + memset(i, 0, offsetof(struct sdcardfs_inode_info, vfs_inode)); + + i->vfs_inode.i_version = 1; + return &i->vfs_inode; +} + +static void sdcardfs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(sdcardfs_inode_cachep, SDCARDFS_I(inode)); +} + +/* sdcardfs inode cache constructor */ +static void init_once(void *obj) +{ + struct sdcardfs_inode_info *i = obj; + + inode_init_once(&i->vfs_inode); +} + +int sdcardfs_init_inode_cache(void) +{ + int err = 0; + + sdcardfs_inode_cachep = + kmem_cache_create("sdcardfs_inode_cache", + sizeof(struct sdcardfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT, init_once); + if (!sdcardfs_inode_cachep) + err = -ENOMEM; + return err; +} + +/* sdcardfs inode cache destructor */ +void sdcardfs_destroy_inode_cache(void) +{ + if (sdcardfs_inode_cachep) + kmem_cache_destroy(sdcardfs_inode_cachep); +} + +/* + * Used only in nfs, to kill any pending RPC tasks, so that subsequent + * code can actually succeed and won't leave tasks that need handling. + */ +static void sdcardfs_umount_begin(struct super_block *sb) +{ + struct super_block *lower_sb; + + lower_sb = sdcardfs_lower_super(sb); + if (lower_sb && lower_sb->s_op && lower_sb->s_op->umount_begin) + lower_sb->s_op->umount_begin(lower_sb); +} + +static int sdcardfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(mnt->mnt_sb); + struct sdcardfs_mount_options *opts = &sbi->options; + + if (opts->fs_low_uid != 0) + seq_printf(m, ",uid=%u", opts->fs_low_uid); + if (opts->fs_low_gid != 0) + seq_printf(m, ",gid=%u", opts->fs_low_gid); + + if (opts->derive == DERIVE_NONE) + seq_printf(m, ",derive=none"); + else if (opts->derive == DERIVE_LEGACY) + seq_printf(m, ",derive=legacy"); + else if (opts->derive == DERIVE_UNIFIED) + seq_printf(m, ",derive=unified"); + + if (opts->reserved_mb != 0) + seq_printf(m, ",reserved=%uMB", opts->reserved_mb); + + return 0; +}; + +const struct super_operations sdcardfs_sops = { + .put_super = sdcardfs_put_super, + .statfs = sdcardfs_statfs, + .remount_fs = sdcardfs_remount_fs, + .evict_inode = sdcardfs_evict_inode, + .umount_begin = sdcardfs_umount_begin, + .show_options = sdcardfs_show_options, + .alloc_inode = sdcardfs_alloc_inode, + .destroy_inode = sdcardfs_destroy_inode, + .drop_inode = generic_delete_inode, +}; diff --git a/include/linux/namei.h b/include/linux/namei.h index f29abda31e6d..e4f735dcee6e 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -44,6 +44,9 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_JUMPED 0x1000 #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 +#ifdef CONFIG_SDCARD_FS_CI_SEARCH +#define LOOKUP_CASE_INSENSITIVE 0x8000 +#endif extern int path_pts(struct path *path); diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 9bd559472c92..270b764f880e 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -53,6 +53,8 @@ #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" +#define SDCARDFS_SUPER_MAGIC 0xb550ca10 + #define SMB_SUPER_MAGIC 0x517B #define CGROUP_SUPER_MAGIC 0x27e0eb #define CGROUP2_SUPER_MAGIC 0x63677270 -- GitLab From d1d080c3325ab11d8c28f59b28e02d82e423db64 Mon Sep 17 00:00:00 2001 From: Daniel Campello Date: Mon, 20 Jul 2015 16:27:37 -0700 Subject: [PATCH 0986/1442] ANDROID: Port of sdcardfs to 4.4 Change-Id: I25b99ecf214e72ebf6a57ec3085972542a8d7951 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/Kconfig | 1 - fs/sdcardfs/dentry.c | 9 +- fs/sdcardfs/file.c | 47 ++++--- fs/sdcardfs/hashtable.h | 190 -------------------------- fs/sdcardfs/inode.c | 280 +++++++++++++++----------------------- fs/sdcardfs/lookup.c | 25 ++-- fs/sdcardfs/main.c | 113 ++++++++------- fs/sdcardfs/mmap.c | 5 +- fs/sdcardfs/packagelist.c | 39 +++--- fs/sdcardfs/sdcardfs.h | 41 +++--- fs/sdcardfs/super.c | 6 +- include/linux/namei.h | 2 + 12 files changed, 252 insertions(+), 506 deletions(-) delete mode 100644 fs/sdcardfs/hashtable.h diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig index 657f4958e8d6..d995f3eaae6d 100644 --- a/fs/sdcardfs/Kconfig +++ b/fs/sdcardfs/Kconfig @@ -1,6 +1,5 @@ config SDCARD_FS tristate "sdcard file system" - depends on EXPERIMENTAL default n help Sdcardfs is based on Wrapfs file system. diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index 4572a5403bb2..dbbcfd091fc7 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -26,7 +26,7 @@ * 0: tell VFS to invalidate dentry * 1: dentry is valid */ -static int sdcardfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) +static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags) { int err = 1; struct path parent_lower_path, lower_path; @@ -35,7 +35,7 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) struct dentry *lower_cur_parent_dentry = NULL; struct dentry *lower_dentry = NULL; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; spin_lock(&dentry->d_lock); @@ -119,7 +119,7 @@ static void sdcardfs_d_release(struct dentry *dentry) } static int sdcardfs_hash_ci(const struct dentry *dentry, - const struct inode *inode, struct qstr *qstr) + struct qstr *qstr) { /* * This function is copy of vfat_hashi. @@ -148,8 +148,7 @@ static int sdcardfs_hash_ci(const struct dentry *dentry, * Case insensitive compare of two vfat names. */ static int sdcardfs_cmp_ci(const struct dentry *parent, - const struct inode *pinode, - const struct dentry *dentry, const struct inode *inode, + const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { /* This function is copy of vfat_cmpi */ diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index bcacb947c874..f9c5eaafc619 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -50,8 +50,8 @@ static ssize_t sdcardfs_read(struct file *file, char __user *buf, err = vfs_read(lower_file, buf, count, ppos); /* update our inode atime upon a successful lower read */ if (err >= 0) - fsstack_copy_attr_atime(dentry->d_inode, - lower_file->f_path.dentry->d_inode); + fsstack_copy_attr_atime(d_inode(dentry), + file_inode(lower_file)); return err; } @@ -59,7 +59,7 @@ static ssize_t sdcardfs_read(struct file *file, char __user *buf, static ssize_t sdcardfs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - int err = 0; + int err; struct file *lower_file; struct dentry *dentry = file->f_path.dentry; @@ -73,29 +73,29 @@ static ssize_t sdcardfs_write(struct file *file, const char __user *buf, err = vfs_write(lower_file, buf, count, ppos); /* update our inode times+sizes upon a successful lower write */ if (err >= 0) { - fsstack_copy_inode_size(dentry->d_inode, - lower_file->f_path.dentry->d_inode); - fsstack_copy_attr_times(dentry->d_inode, - lower_file->f_path.dentry->d_inode); + fsstack_copy_inode_size(d_inode(dentry), + file_inode(lower_file)); + fsstack_copy_attr_times(d_inode(dentry), + file_inode(lower_file)); } return err; } -static int sdcardfs_readdir(struct file *file, void *dirent, filldir_t filldir) +static int sdcardfs_readdir(struct file *file, struct dir_context *ctx) { - int err = 0; + int err; struct file *lower_file = NULL; struct dentry *dentry = file->f_path.dentry; lower_file = sdcardfs_lower_file(file); lower_file->f_pos = file->f_pos; - err = vfs_readdir(lower_file, filldir, dirent); + err = iterate_dir(lower_file, ctx); file->f_pos = lower_file->f_pos; if (err >= 0) /* copy the atime */ - fsstack_copy_attr_atime(dentry->d_inode, - lower_file->f_path.dentry->d_inode); + fsstack_copy_attr_atime(d_inode(dentry), + file_inode(lower_file)); return err; } @@ -191,7 +191,6 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma) */ file_accessed(file); vma->vm_ops = &sdcardfs_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */ if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */ @@ -242,8 +241,8 @@ static int sdcardfs_open(struct inode *inode, struct file *file) /* open lower object and link sdcardfs's file struct to lower's */ sdcardfs_get_lower_path(file->f_path.dentry, &lower_path); - lower_file = dentry_open(lower_path.dentry, lower_path.mnt, - file->f_flags, current_cred()); + lower_file = dentry_open(&lower_path, file->f_flags, current_cred()); + path_put(&lower_path); if (IS_ERR(lower_file)) { err = PTR_ERR(lower_file); lower_file = sdcardfs_lower_file(file); @@ -275,8 +274,10 @@ static int sdcardfs_flush(struct file *file, fl_owner_t id) struct file *lower_file = NULL; lower_file = sdcardfs_lower_file(file); - if (lower_file && lower_file->f_op && lower_file->f_op->flush) + if (lower_file && lower_file->f_op && lower_file->f_op->flush) { + filemap_write_and_wait(file->f_mapping); err = lower_file->f_op->flush(lower_file, id); + } return err; } @@ -296,19 +297,23 @@ static int sdcardfs_file_release(struct inode *inode, struct file *file) return 0; } -static int -sdcardfs_fsync(struct file *file, int datasync) +static int sdcardfs_fsync(struct file *file, loff_t start, loff_t end, + int datasync) { int err; struct file *lower_file; struct path lower_path; struct dentry *dentry = file->f_path.dentry; + err = __generic_file_fsync(file, start, end, datasync); + if (err) + goto out; + lower_file = sdcardfs_lower_file(file); sdcardfs_get_lower_path(dentry, &lower_path); - err = vfs_fsync(lower_file, datasync); + err = vfs_fsync_range(lower_file, start, end, datasync); sdcardfs_put_lower_path(dentry, &lower_path); - +out: return err; } @@ -344,7 +349,7 @@ const struct file_operations sdcardfs_main_fops = { const struct file_operations sdcardfs_dir_fops = { .llseek = generic_file_llseek, .read = generic_read_dir, - .readdir = sdcardfs_readdir, + .iterate = sdcardfs_readdir, .unlocked_ioctl = sdcardfs_unlocked_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = sdcardfs_compat_ioctl, diff --git a/fs/sdcardfs/hashtable.h b/fs/sdcardfs/hashtable.h deleted file mode 100644 index 1e770f3df148..000000000000 --- a/fs/sdcardfs/hashtable.h +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Statically sized hash table implementation - * (C) 2012 Sasha Levin - */ - -#ifndef _LINUX_HASHTABLE_H -#define _LINUX_HASHTABLE_H - -#include -#include -#include -#include -#include - -#define DEFINE_HASHTABLE(name, bits) \ - struct hlist_head name[1 << (bits)] = \ - { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } - -#define DECLARE_HASHTABLE(name, bits) \ - struct hlist_head name[1 << (bits)] - -#define HASH_SIZE(name) (ARRAY_SIZE(name)) -#define HASH_BITS(name) ilog2(HASH_SIZE(name)) - -/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */ -#define hash_min(val, bits) \ - (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)) - -static inline void __hash_init(struct hlist_head *ht, unsigned int sz) -{ - unsigned int i; - - for (i = 0; i < sz; i++) - INIT_HLIST_HEAD(&ht[i]); -} - -/** - * hash_init - initialize a hash table - * @hashtable: hashtable to be initialized - * - * Calculates the size of the hashtable from the given parameter, otherwise - * same as hash_init_size. - * - * This has to be a macro since HASH_BITS() will not work on pointers since - * it calculates the size during preprocessing. - */ -#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable)) - -/** - * hash_add - add an object to a hashtable - * @hashtable: hashtable to add to - * @node: the &struct hlist_node of the object to be added - * @key: the key of the object to be added - */ -#define hash_add(hashtable, node, key) \ - hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) - -/** - * hash_add_rcu - add an object to a rcu enabled hashtable - * @hashtable: hashtable to add to - * @node: the &struct hlist_node of the object to be added - * @key: the key of the object to be added - */ -#define hash_add_rcu(hashtable, node, key) \ - hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) - -/** - * hash_hashed - check whether an object is in any hashtable - * @node: the &struct hlist_node of the object to be checked - */ -static inline bool hash_hashed(struct hlist_node *node) -{ - return !hlist_unhashed(node); -} - -static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz) -{ - unsigned int i; - - for (i = 0; i < sz; i++) - if (!hlist_empty(&ht[i])) - return false; - - return true; -} - -/** - * hash_empty - check whether a hashtable is empty - * @hashtable: hashtable to check - * - * This has to be a macro since HASH_BITS() will not work on pointers since - * it calculates the size during preprocessing. - */ -#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable)) - -/** - * hash_del - remove an object from a hashtable - * @node: &struct hlist_node of the object to remove - */ -static inline void hash_del(struct hlist_node *node) -{ - hlist_del_init(node); -} - -/** - * hash_del_rcu - remove an object from a rcu enabled hashtable - * @node: &struct hlist_node of the object to remove - */ -static inline void hash_del_rcu(struct hlist_node *node) -{ - hlist_del_init_rcu(node); -} - -/** - * hash_for_each - iterate over a hashtable - * @name: hashtable to iterate - * @bkt: integer to use as bucket loop cursor - * @obj: the type * to use as a loop cursor for each entry - * @member: the name of the hlist_node within the struct - */ -#define hash_for_each(name, bkt, obj, member, pos) \ - for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ - (bkt)++)\ - hlist_for_each_entry(obj, pos, &name[bkt], member) - -/** - * hash_for_each_rcu - iterate over a rcu enabled hashtable - * @name: hashtable to iterate - * @bkt: integer to use as bucket loop cursor - * @obj: the type * to use as a loop cursor for each entry - * @member: the name of the hlist_node within the struct - */ -#define hash_for_each_rcu(name, bkt, obj, member) \ - for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ - (bkt)++)\ - hlist_for_each_entry_rcu(obj, &name[bkt], member) - -/** - * hash_for_each_safe - iterate over a hashtable safe against removal of - * hash entry - * @name: hashtable to iterate - * @bkt: integer to use as bucket loop cursor - * @tmp: a &struct used for temporary storage - * @obj: the type * to use as a loop cursor for each entry - * @member: the name of the hlist_node within the struct - */ -#define hash_for_each_safe(name, bkt, tmp, obj, member, pos) \ - for ((bkt) = 0, obj = NULL; (bkt) < HASH_SIZE(name);\ - (bkt)++)\ - hlist_for_each_entry_safe(obj, pos, tmp, &name[bkt], member) - -/** - * hash_for_each_possible - iterate over all possible objects hashing to the - * same bucket - * @name: hashtable to iterate - * @obj: the type * to use as a loop cursor for each entry - * @member: the name of the hlist_node within the struct - * @key: the key of the objects to iterate over - */ -#define hash_for_each_possible(name, obj, member, key, pos) \ - hlist_for_each_entry(obj, pos, &name[hash_min(key, HASH_BITS(name))], member) - -/** - * hash_for_each_possible_rcu - iterate over all possible objects hashing to the - * same bucket in an rcu enabled hashtable - * in a rcu enabled hashtable - * @name: hashtable to iterate - * @obj: the type * to use as a loop cursor for each entry - * @member: the name of the hlist_node within the struct - * @key: the key of the objects to iterate over - */ -#define hash_for_each_possible_rcu(name, obj, member, key) \ - hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ - member) - -/** - * hash_for_each_possible_safe - iterate over all possible objects hashing to the - * same bucket safe against removals - * @name: hashtable to iterate - * @obj: the type * to use as a loop cursor for each entry - * @tmp: a &struct used for temporary storage - * @member: the name of the hlist_node within the struct - * @key: the key of the objects to iterate over - */ -#define hash_for_each_possible_safe(name, obj, tmp, member, key) \ - hlist_for_each_entry_safe(obj, tmp,\ - &name[hash_min(key, HASH_BITS(name))], member) - - -#endif diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index e8ed04250ed1..75c622bac2f5 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -30,8 +30,8 @@ const struct cred * override_fsids(struct sdcardfs_sb_info* sbi) if (!cred) return NULL; - cred->fsuid = sbi->options.fs_low_uid; - cred->fsgid = sbi->options.fs_low_gid; + cred->fsuid = make_kuid(&init_user_ns, sbi->options.fs_low_uid); + cred->fsgid = make_kgid(&init_user_ns, sbi->options.fs_low_gid); old_cred = override_creds(cred); @@ -49,12 +49,12 @@ void revert_fsids(const struct cred * old_cred) } static int sdcardfs_create(struct inode *dir, struct dentry *dentry, - int mode, struct nameidata *nd) + umode_t mode, bool want_excl) { - int err = 0; + int err; struct dentry *lower_dentry; struct dentry *lower_parent_dentry = NULL; - struct path lower_path, saved_path; + struct path lower_path; struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; @@ -74,18 +74,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - - pathcpy(&saved_path, &nd->path); - pathcpy(&nd->path, &lower_path); - /* set last 16bytes of mode field to 0664 */ mode = (mode & S_IFMT) | 00664; - err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode, nd); - - pathcpy(&nd->path, &saved_path); + err = vfs_create(d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); if (err) goto out; @@ -93,11 +84,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, if (err) goto out; fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); - fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); REVERT_CRED(saved_cred); @@ -118,33 +107,27 @@ static int sdcardfs_link(struct dentry *old_dentry, struct inode *dir, OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb)); - file_size_save = i_size_read(old_dentry->d_inode); + file_size_save = i_size_read(d_inode(old_dentry)); sdcardfs_get_lower_path(old_dentry, &lower_old_path); sdcardfs_get_lower_path(new_dentry, &lower_new_path); lower_old_dentry = lower_old_path.dentry; lower_new_dentry = lower_new_path.dentry; lower_dir_dentry = lock_parent(lower_new_dentry); - err = mnt_want_write(lower_new_path.mnt); - if (err) - goto out_unlock; - - err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, - lower_new_dentry); - if (err || !lower_new_dentry->d_inode) + err = vfs_link(lower_old_dentry, d_inode(lower_dir_dentry), + lower_new_dentry, NULL); + if (err || !d_inode(lower_new_dentry)) goto out; err = sdcardfs_interpose(new_dentry, dir->i_sb, &lower_new_path); if (err) goto out; - fsstack_copy_attr_times(dir, lower_new_dentry->d_inode); - fsstack_copy_inode_size(dir, lower_new_dentry->d_inode); - old_dentry->d_inode->i_nlink = - sdcardfs_lower_inode(old_dentry->d_inode)->i_nlink; - i_size_write(new_dentry->d_inode, file_size_save); + fsstack_copy_attr_times(dir, d_inode(lower_new_dentry)); + fsstack_copy_inode_size(dir, d_inode(lower_new_dentry)); + set_nlink(d_inode(old_dentry), + sdcardfs_lower_inode(d_inode(old_dentry))->i_nlink); + i_size_write(d_inode(new_dentry), file_size_save); out: - mnt_drop_write(lower_new_path.mnt); -out_unlock: unlock_dir(lower_dir_dentry); sdcardfs_put_lower_path(old_dentry, &lower_old_path); sdcardfs_put_lower_path(new_dentry, &lower_new_path); @@ -180,10 +163,7 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - err = vfs_unlink(lower_dir_inode, lower_dentry); + err = vfs_unlink(lower_dir_inode, lower_dentry, NULL); /* * Note: unlinking on top of NFS can cause silly-renamed files. @@ -198,13 +178,11 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) goto out; fsstack_copy_attr_times(dir, lower_dir_inode); fsstack_copy_inode_size(dir, lower_dir_inode); - dentry->d_inode->i_nlink = - sdcardfs_lower_inode(dentry->d_inode)->i_nlink; - dentry->d_inode->i_ctime = dir->i_ctime; + set_nlink(d_inode(dentry), + sdcardfs_lower_inode(d_inode(dentry))->i_nlink); + d_inode(dentry)->i_ctime = dir->i_ctime; d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */ out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_dir_dentry); dput(lower_dentry); sdcardfs_put_lower_path(dentry, &lower_path); @@ -217,7 +195,7 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - int err = 0; + int err; struct dentry *lower_dentry; struct dentry *lower_parent_dentry = NULL; struct path lower_path; @@ -228,21 +206,16 @@ static int sdcardfs_symlink(struct inode *dir, struct dentry *dentry, lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname); + err = vfs_symlink(d_inode(lower_parent_dentry), lower_dentry, symname); if (err) goto out; err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); if (err) goto out; fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); - fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); REVERT_CRED(); @@ -266,9 +239,9 @@ static int touch(char *abs_path, mode_t mode) { return 0; } -static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - int err = 0; + int err; int make_nomedia_in_obb = 0; struct dentry *lower_dentry; struct dentry *lower_parent_dentry = NULL; @@ -306,13 +279,9 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - /* set last 16bytes of mode field to 0775 */ mode = (mode & S_IFMT) | 00775; - err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry, mode); + err = vfs_mkdir(d_inode(lower_parent_dentry), lower_dentry, mode); if (err) goto out; @@ -341,9 +310,9 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out; fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); - fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); /* update number of links on parent directory */ - dir->i_nlink = sdcardfs_lower_inode(dir)->i_nlink; + set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink); if ((sbi->options.derive == DERIVE_UNIFIED) && (!strcasecmp(dentry->d_name.name, "obb")) && (pi->perm == PERM_ANDROID) && (pi->userid == 0)) @@ -388,8 +357,6 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) kfree(nomedia_fullpath); } out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); out_revert: @@ -427,23 +394,18 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) lower_dentry = lower_path.dentry; lower_dir_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); + err = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry); if (err) goto out; d_drop(dentry); /* drop our dentry on success (why not VFS's job?) */ - if (dentry->d_inode) - clear_nlink(dentry->d_inode); - fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); - fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); - dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; + if (d_inode(dentry)) + clear_nlink(d_inode(dentry)); + fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry)); + fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry)); + set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink); out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_dir_dentry); sdcardfs_put_real_lower(dentry, &lower_path); REVERT_CRED(saved_cred); @@ -452,10 +414,10 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) } #if 0 -static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode, +static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - int err = 0; + int err; struct dentry *lower_dentry; struct dentry *lower_parent_dentry = NULL; struct path lower_path; @@ -466,10 +428,7 @@ static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode, lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - err = mnt_want_write(lower_path.mnt); - if (err) - goto out_unlock; - err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev); + err = vfs_mknod(d_inode(lower_parent_dentry), lower_dentry, mode, dev); if (err) goto out; @@ -477,11 +436,9 @@ static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, int mode, if (err) goto out; fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); - fsstack_copy_inode_size(dir, lower_parent_dentry->d_inode); + fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); out: - mnt_drop_write(lower_path.mnt); -out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); REVERT_CRED(); @@ -541,43 +498,33 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } - err = mnt_want_write(lower_old_path.mnt); + err = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry, + d_inode(lower_new_dir_dentry), lower_new_dentry, + NULL, 0); if (err) goto out; - err = mnt_want_write(lower_new_path.mnt); - if (err) - goto out_drop_old_write; - - err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, - lower_new_dir_dentry->d_inode, lower_new_dentry); - if (err) - goto out_err; /* Copy attrs from lower dir, but i_uid/i_gid */ - fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); - fsstack_copy_inode_size(new_dir, lower_new_dir_dentry->d_inode); + fsstack_copy_attr_all(new_dir, d_inode(lower_new_dir_dentry)); + fsstack_copy_inode_size(new_dir, d_inode(lower_new_dir_dentry)); fix_derived_permission(new_dir); if (new_dir != old_dir) { - fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); - fsstack_copy_inode_size(old_dir, lower_old_dir_dentry->d_inode); + fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry)); + fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry)); fix_derived_permission(old_dir); /* update the derived permission of the old_dentry * with its new parent */ new_parent = dget_parent(new_dentry); if(new_parent) { - if(old_dentry->d_inode) { + if(d_inode(old_dentry)) { get_derived_permission(new_parent, old_dentry); - fix_derived_permission(old_dentry->d_inode); + fix_derived_permission(d_inode(old_dentry)); } dput(new_parent); } } -out_err: - mnt_drop_write(lower_new_path.mnt); -out_drop_old_write: - mnt_drop_write(lower_old_path.mnt); out: unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); dput(lower_old_dir_dentry); @@ -599,17 +546,17 @@ static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; - if (!lower_dentry->d_inode->i_op || - !lower_dentry->d_inode->i_op->readlink) { + if (!d_inode(lower_dentry)->i_op || + !d_inode(lower_dentry)->i_op->readlink) { err = -EINVAL; goto out; } - err = lower_dentry->d_inode->i_op->readlink(lower_dentry, + err = d_inode(lower_dentry)->i_op->readlink(lower_dentry, buf, bufsiz); if (err < 0) goto out; - fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode); + fsstack_copy_attr_atime(d_inode(dentry), d_inode(lower_dentry)); out: sdcardfs_put_lower_path(dentry, &lower_path); @@ -618,7 +565,7 @@ static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz #endif #if 0 -static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie) { char *buf; int len = PAGE_SIZE, err; @@ -628,7 +575,7 @@ static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd) buf = kmalloc(len, GFP_KERNEL); if (!buf) { buf = ERR_PTR(-ENOMEM); - goto out; + return buf; } /* read the symlink, and then we will follow it */ @@ -642,35 +589,19 @@ static void *sdcardfs_follow_link(struct dentry *dentry, struct nameidata *nd) } else { buf[err] = '\0'; } -out: - nd_set_link(nd, buf); - return NULL; -} -#endif - -#if 0 -/* this @nd *IS* still used */ -static void sdcardfs_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - char *buf = nd_get_link(nd); - if (!IS_ERR(buf)) /* free the char* */ - kfree(buf); + return *cookie = buf; } #endif -static int sdcardfs_permission(struct inode *inode, int mask, unsigned int flags) +static int sdcardfs_permission(struct inode *inode, int mask) { int err; - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - /* * Permission check on sdcardfs inode. * Calling process should have AID_SDCARD_RW permission */ - err = generic_permission(inode, mask, 0, inode->i_op->check_acl); + err = generic_permission(inode, mask); /* XXX * Original sdcardfs code calls inode_permission(lower_inode,.. ) @@ -700,49 +631,9 @@ static int sdcardfs_permission(struct inode *inode, int mask, unsigned int flags } -static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) -{ - struct dentry *lower_dentry; - struct inode *inode; - struct inode *lower_inode; - struct path lower_path; - struct dentry *parent; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); - - parent = dget_parent(dentry); - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, - sbi->options.derive, 0, 0)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); - dput(parent); - return -EACCES; - } - dput(parent); - - inode = dentry->d_inode; - - sdcardfs_get_lower_path(dentry, &lower_path); - lower_dentry = lower_path.dentry; - lower_inode = sdcardfs_lower_inode(inode); - - fsstack_copy_attr_all(inode, lower_inode); - fsstack_copy_inode_size(inode, lower_inode); - /* if the dentry has been moved from other location - * so, on this stage, its derived permission must be - * rechecked from its private field. - */ - fix_derived_permission(inode); - - generic_fillattr(inode, stat); - sdcardfs_put_lower_path(dentry, &lower_path); - return 0; -} - static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) { - int err = 0; + int err; struct dentry *lower_dentry; struct inode *inode; struct inode *lower_inode; @@ -752,7 +643,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) struct dentry *parent; int has_rw; - inode = dentry->d_inode; + inode = d_inode(dentry); /* * Check if user has permission to change inode. We don't check if @@ -766,7 +657,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) /* check the Android group ID */ has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); parent = dget_parent(dentry); - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name, sbi->options.derive, 1, has_rw)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", @@ -819,13 +710,14 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) /* notify the (possibly copied-up) lower inode */ /* - * Note: we use lower_dentry->d_inode, because lower_inode may be + * Note: we use d_inode(lower_dentry), because lower_inode may be * unlinked (no inode->i_sb and i_ino==0. This happens if someone * tries to open(), unlink(), then ftruncate() a file. */ - mutex_lock(&lower_dentry->d_inode->i_mutex); - err = notify_change(lower_dentry, &lower_ia); /* note: lower_ia */ - mutex_unlock(&lower_dentry->d_inode->i_mutex); + mutex_lock(&d_inode(lower_dentry)->i_mutex); + err = notify_change(lower_dentry, &lower_ia, /* note: lower_ia */ + NULL); + mutex_unlock(&d_inode(lower_dentry)->i_mutex); if (current->mm) up_write(¤t->mm->mmap_sem); if (err) @@ -848,6 +740,46 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) return err; } +static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct dentry *lower_dentry; + struct inode *inode; + struct inode *lower_inode; + struct path lower_path; + struct dentry *parent; + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); + + parent = dget_parent(dentry); + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name, + sbi->options.derive, 0, 0)) { + printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" + " dentry: %s, task:%s\n", + __func__, dentry->d_name.name, current->comm); + dput(parent); + return -EACCES; + } + dput(parent); + + inode = d_inode(dentry); + + sdcardfs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + lower_inode = sdcardfs_lower_inode(inode); + + fsstack_copy_attr_all(inode, lower_inode); + fsstack_copy_inode_size(inode, lower_inode); + /* if the dentry has been moved from other location + * so, on this stage, its derived permission must be + * rechecked from its private field. + */ + fix_derived_permission(inode); + + generic_fillattr(inode, stat); + sdcardfs_put_lower_path(dentry, &lower_path); + return 0; +} + const struct inode_operations sdcardfs_symlink_iops = { .permission = sdcardfs_permission, .setattr = sdcardfs_setattr, @@ -856,14 +788,16 @@ const struct inode_operations sdcardfs_symlink_iops = { * These methods are *NOT* perfectly tested. .readlink = sdcardfs_readlink, .follow_link = sdcardfs_follow_link, - .put_link = sdcardfs_put_link, + .put_link = kfree_put_link, */ }; const struct inode_operations sdcardfs_dir_iops = { .create = sdcardfs_create, .lookup = sdcardfs_lookup, +#if 0 .permission = sdcardfs_permission, +#endif .unlink = sdcardfs_unlink, .mkdir = sdcardfs_mkdir, .rmdir = sdcardfs_rmdir, diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index c0b12375b1bf..a4b94df99f32 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -79,8 +79,7 @@ static int sdcardfs_inode_set(struct inode *inode, void *lower_inode) return 0; } -static struct inode *sdcardfs_iget(struct super_block *sb, - struct inode *lower_inode) +struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode) { struct sdcardfs_inode_info *info; struct inode *inode; /* the new inode to return */ @@ -206,14 +205,13 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, * Fills in lower_parent_path with on success. */ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, - struct nameidata *nd, struct path *lower_parent_path) + unsigned int flags, struct path *lower_parent_path) { int err = 0; struct vfsmount *lower_dir_mnt; struct dentry *lower_dir_dentry = NULL; struct dentry *lower_dentry; const char *name; - struct nameidata lower_nd; struct path lower_path; struct qstr this; struct sdcardfs_sb_info *sbi; @@ -234,10 +232,10 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, /* Use vfs_path_lookup to check if the dentry exists or not */ if (sbi->options.lower_fs == LOWER_FS_EXT4) { err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, - LOOKUP_CASE_INSENSITIVE, &lower_nd); + LOOKUP_CASE_INSENSITIVE, &lower_path); } else if (sbi->options.lower_fs == LOWER_FS_FAT) { err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, - &lower_nd); + &lower_path); } /* no error: handle positive dentries */ @@ -253,7 +251,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, * and the base obbpath will be copyed to the lower_path variable. * if an error returned, there's no change in the lower_path * returns: -ERRNO if error (0: no error) */ - err = setup_obb_dentry(dentry, &lower_nd.path); + err = setup_obb_dentry(dentry, &lower_path); if(err) { /* if the sbi->obbpath is not available, we can optionally @@ -267,8 +265,8 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, } } - sdcardfs_set_lower_path(dentry, &lower_nd.path); - err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_nd.path); + sdcardfs_set_lower_path(dentry, &lower_path); + err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path); if (err) /* path_put underlying path on error */ sdcardfs_put_reset_lower_path(dentry); goto out; @@ -306,10 +304,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, * the VFS will continue the process of making this negative dentry * into a positive one. */ - if (nd) { - if (nd->flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET)) - err = 0; - } else + if (flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET)) err = 0; out: @@ -328,7 +323,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, * @nd : nameidata of parent inode */ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct dentry *ret = NULL, *parent; struct path lower_parent_path; @@ -359,7 +354,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - ret = __sdcardfs_lookup(dentry, nd, &lower_parent_path); + ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path); if (IS_ERR(ret)) { goto out; diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 1fdceffec72c..9d04ae8ceb46 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -156,6 +156,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, return 0; } +#if 0 /* * our custom d_alloc_root work-alike * @@ -181,6 +182,7 @@ static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb) } return ret; } +#endif /* * There is no need to lock the sdcardfs_super_info's rwsem as there is no @@ -195,6 +197,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, struct path lower_path; struct sdcardfs_sb_info *sb_info; void *pkgl_id; + struct inode *inode; printk(KERN_INFO "sdcardfs version 2.0\n"); @@ -259,12 +262,18 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, sb->s_magic = SDCARDFS_SUPER_MAGIC; sb->s_op = &sdcardfs_sops; - /* see comment next to the definition of sdcardfs_d_alloc_root */ - sb->s_root = sdcardfs_d_alloc_root(sb); + /* get a new inode and allocate our root dentry */ + inode = sdcardfs_iget(sb, lower_path.dentry->d_inode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_sput; + } + sb->s_root = d_make_root(inode); if (!sb->s_root) { err = -ENOMEM; - goto out_sput; + goto out_iput; } + d_set_d_op(sb->s_root, &sdcardfs_ci_dops); /* link the upper and lower dentries */ sb->s_root->d_fsdata = NULL; @@ -275,56 +284,60 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, /* set the lower dentries for s_root */ sdcardfs_set_lower_path(sb->s_root, &lower_path); - /* call interpose to create the upper level inode */ - err = sdcardfs_interpose(sb->s_root, sb, &lower_path); - if (!err) { - /* setup permission policy */ - switch(sb_info->options.derive) { - case DERIVE_NONE: - setup_derived_state(sb->s_root->d_inode, - PERM_ROOT, 0, AID_ROOT, AID_SDCARD_RW, 00775); - sb_info->obbpath_s = NULL; - break; - case DERIVE_LEGACY: - /* Legacy behavior used to support internal multiuser layout which - * places user_id at the top directory level, with the actual roots - * just below that. Shared OBB path is also at top level. */ - setup_derived_state(sb->s_root->d_inode, - PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); - /* initialize the obbpath string and lookup the path - * sb_info->obb_path will be deactivated by path_put - * on sdcardfs_put_super */ - sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); - snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); - err = prepare_dir(sb_info->obbpath_s, - sb_info->options.fs_low_uid, - sb_info->options.fs_low_gid, 00755); - if(err) - printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n", - __func__,__LINE__, sb_info->obbpath_s); - break; - case DERIVE_UNIFIED: - /* Unified multiuser layout which places secondary user_id under - * /Android/user and shared OBB path under /Android/obb. */ - setup_derived_state(sb->s_root->d_inode, - PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); - - sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); - snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); - break; - } - fix_derived_permission(sb->s_root->d_inode); + /* + * No need to call interpose because we already have a positive + * dentry, which was instantiated by d_make_root. Just need to + * d_rehash it. + */ + d_rehash(sb->s_root); - if (!silent) - printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n", - dev_name, lower_sb->s_type->name); - goto out; + /* setup permission policy */ + switch(sb_info->options.derive) { + case DERIVE_NONE: + setup_derived_state(sb->s_root->d_inode, + PERM_ROOT, 0, AID_ROOT, AID_SDCARD_RW, 00775); + sb_info->obbpath_s = NULL; + break; + case DERIVE_LEGACY: + /* Legacy behavior used to support internal multiuser layout which + * places user_id at the top directory level, with the actual roots + * just below that. Shared OBB path is also at top level. */ + setup_derived_state(sb->s_root->d_inode, + PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); + /* initialize the obbpath string and lookup the path + * sb_info->obb_path will be deactivated by path_put + * on sdcardfs_put_super */ + sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); + err = prepare_dir(sb_info->obbpath_s, + sb_info->options.fs_low_uid, + sb_info->options.fs_low_gid, 00755); + if(err) + printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n", + __func__,__LINE__, sb_info->obbpath_s); + break; + case DERIVE_UNIFIED: + /* Unified multiuser layout which places secondary user_id under + * /Android/user and shared OBB path under /Android/obb. */ + setup_derived_state(sb->s_root->d_inode, + PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); + + sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); + break; } - /* else error: fall through */ + fix_derived_permission(sb->s_root->d_inode); + + if (!silent) + printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n", + dev_name, lower_sb->s_type->name); + goto out; /* all is well */ - free_dentry_private_data(sb->s_root); + /* no longer needed: free_dentry_private_data(sb->s_root); */ out_freeroot: dput(sb->s_root); +out_iput: + iput(inode); out_sput: /* drop refs we took earlier */ atomic_dec(&lower_sb->s_active); @@ -346,7 +359,7 @@ static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type, { int error; - struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); + struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); @@ -378,7 +391,7 @@ static struct file_system_type sdcardfs_fs_type = { .name = SDCARDFS_NAME, .mount = sdcardfs_mount, .kill_sb = generic_shutdown_super, - .fs_flags = FS_REVAL_DOT, + .fs_flags = 0, }; static int __init init_sdcardfs_fs(void) diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c index c807d7f18f8b..e21f64675a80 100644 --- a/fs/sdcardfs/mmap.c +++ b/fs/sdcardfs/mmap.c @@ -48,9 +48,8 @@ static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return err; } -static ssize_t sdcardfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) +static ssize_t sdcardfs_direct_IO(struct kiocb *iocb, + struct iov_iter *iter, loff_t pos) { /* * This function returns zero on purpose in order to support direct IO. diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index c786d8f92203..d7ba8d4a423e 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -20,7 +20,7 @@ #include "sdcardfs.h" #include "strtok.h" -#include "hashtable.h" +#include #include #include #include @@ -29,8 +29,8 @@ #define STRING_BUF_SIZE (512) struct hashtable_entry { - struct hlist_node hlist; - void *key; + struct hlist_node hlist; + void *key; int value; }; @@ -67,12 +67,12 @@ static unsigned int str_hash(void *key) { } static int contain_appid_key(struct packagelist_data *pkgl_dat, void *appid) { - struct hashtable_entry *hash_cur; - struct hlist_node *h_n; + struct hashtable_entry *hash_cur; + + hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, (unsigned int)appid) - hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, (unsigned int)appid, h_n) - if (appid == hash_cur->key) - return 1; + if (appid == hash_cur->key) + return 1; return 0; } @@ -87,7 +87,7 @@ int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) { return 1; } - appid = multiuser_get_app_id(current_fsuid()); + appid = multiuser_get_app_id(from_kuid(&init_user_ns, current_fsuid())); mutex_lock(&pkgl_dat->hashtable_lock); ret = contain_appid_key(pkgl_dat, (void *)appid); mutex_unlock(&pkgl_dat->hashtable_lock); @@ -98,13 +98,12 @@ appid_t get_appid(void *pkgl_id, const char *app_name) { struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; struct hashtable_entry *hash_cur; - struct hlist_node *h_n; unsigned int hash = str_hash((void *)app_name); appid_t ret_id; //printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash); mutex_lock(&pkgl_dat->hashtable_lock); - hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) { + hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { //printk(KERN_INFO "sdcardfs: %s: %s\n", __func__, (char *)hash_cur->key); if (!strcasecmp(app_name, hash_cur->key)) { ret_id = (appid_t)hash_cur->value; @@ -140,7 +139,7 @@ int check_caller_access_to_name(struct inode *parent_node, const char* name, /* Root always has access; access for any other UIDs should always * be controlled through packages.list. */ - if (current_fsuid() == 0) { + if (from_kuid(&init_user_ns, current_fsuid()) == 0) { return 1; } @@ -148,7 +147,8 @@ int check_caller_access_to_name(struct inode *parent_node, const char* name, * parent or holds sdcard_rw. */ if (w_ok) { if (parent_node && - (current_fsuid() == SDCARDFS_I(parent_node)->d_uid)) { + (from_kuid(&init_user_ns, current_fsuid()) == + SDCARDFS_I(parent_node)->d_uid)) { return 1; } return has_rw; @@ -174,11 +174,10 @@ int open_flags_to_access_mode(int open_flags) { static int insert_str_to_int(struct packagelist_data *pkgl_dat, void *key, int value) { struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; - struct hlist_node *h_n; unsigned int hash = str_hash(key); //printk(KERN_INFO "sdcardfs: %s: %s: %d, %u\n", __func__, (char *)key, value, hash); - hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash, h_n) { + hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { if (!strcasecmp(key, hash_cur->key)) { hash_cur->value = value; return 0; @@ -202,11 +201,10 @@ static void remove_str_to_int(struct hashtable_entry *h_entry) { static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int value) { struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; - struct hlist_node *h_n; //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value); hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, - (unsigned int)key, h_n) { + (unsigned int)key) { if (key == hash_cur->key) { hash_cur->value = value; return 0; @@ -230,14 +228,13 @@ static void remove_int_to_null(struct hashtable_entry *h_entry) { static void remove_all_hashentrys(struct packagelist_data *pkgl_dat) { struct hashtable_entry *hash_cur; - struct hlist_node *h_n; struct hlist_node *h_t; int i; - hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist, h_n) + hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist) remove_str_to_int(hash_cur); - hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist, h_n) - remove_int_to_null(hash_cur); + hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist) + remove_int_to_null(hash_cur); hash_init(pkgl_dat->package_to_appid); hash_init(pkgl_dat->appid_with_rw); diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 90f8b24e4a52..51f6c7912584 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -69,8 +69,8 @@ #define fix_derived_permission(x) \ do { \ - (x)->i_uid = SDCARDFS_I(x)->d_uid; \ - (x)->i_gid = SDCARDFS_I(x)->d_gid; \ + (x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid); \ + (x)->i_gid = make_kgid(&init_user_ns, SDCARDFS_I(x)->d_gid); \ (x)->i_mode = ((x)->i_mode & S_IFMT) | SDCARDFS_I(x)->d_mode;\ } while (0) @@ -159,7 +159,9 @@ extern void sdcardfs_destroy_dentry_cache(void); extern int new_dentry_private_data(struct dentry *dentry); extern void free_dentry_private_data(struct dentry *dentry); extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd); + unsigned int flags); +extern struct inode *sdcardfs_iget(struct super_block *sb, + struct inode *lower_inode); extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, struct path *lower_path); @@ -387,13 +389,13 @@ extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path); static inline struct dentry *lock_parent(struct dentry *dentry) { struct dentry *dir = dget_parent(dentry); - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); return dir; } static inline void unlock_dir(struct dentry *dir) { - mutex_unlock(&dir->d_inode->i_mutex); + mutex_unlock(&d_inode(dir)->i_mutex); dput(dir); } @@ -402,16 +404,9 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m int err; struct dentry *dent; struct iattr attrs; - struct nameidata nd; + struct path parent; - err = kern_path_parent(path_s, &nd); - if (err) { - if (err == -EEXIST) - err = 0; - goto out; - } - - dent = lookup_create(&nd, 1); + dent = kern_path_locked(path_s, &parent); if (IS_ERR(dent)) { err = PTR_ERR(dent); if (err == -EEXIST) @@ -419,29 +414,27 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m goto out_unlock; } - err = vfs_mkdir(nd.path.dentry->d_inode, dent, mode); + err = vfs_mkdir(d_inode(parent.dentry), dent, mode); if (err) { if (err == -EEXIST) err = 0; goto out_dput; } - attrs.ia_uid = uid; - attrs.ia_gid = gid; + attrs.ia_uid = make_kuid(&init_user_ns, uid); + attrs.ia_gid = make_kgid(&init_user_ns, gid); attrs.ia_valid = ATTR_UID | ATTR_GID; - mutex_lock(&dent->d_inode->i_mutex); - notify_change(dent, &attrs); - mutex_unlock(&dent->d_inode->i_mutex); + mutex_lock(&d_inode(dent)->i_mutex); + notify_change(dent, &attrs, NULL); + mutex_unlock(&d_inode(dent)->i_mutex); out_dput: dput(dent); out_unlock: /* parent dentry locked by lookup_create */ - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - path_put(&nd.path); - -out: + mutex_unlock(&d_inode(parent.dentry)->i_mutex); + path_put(&parent); return err; } diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index 1d206c82dfdf..f153ce1b8cf3 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -122,7 +122,7 @@ static void sdcardfs_evict_inode(struct inode *inode) struct inode *lower_inode; truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); /* * Decrement a reference to a lower_inode, which was incremented * by our read_inode when it was created initially. @@ -193,9 +193,9 @@ static void sdcardfs_umount_begin(struct super_block *sb) lower_sb->s_op->umount_begin(lower_sb); } -static int sdcardfs_show_options(struct seq_file *m, struct vfsmount *mnt) +static int sdcardfs_show_options(struct seq_file *m, struct dentry *root) { - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(mnt->mnt_sb); + struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb); struct sdcardfs_mount_options *opts = &sbi->options; if (opts->fs_low_uid != 0) diff --git a/include/linux/namei.h b/include/linux/namei.h index e4f735dcee6e..b3cb8dbc623d 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -81,6 +81,8 @@ extern struct dentry *user_path_create(int, const char __user *, struct path *, extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int); +extern int vfs_path_lookup(struct dentry *, struct vfsmount *, + const char *, unsigned int, struct path *); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); -- GitLab From 5d3b41622694598b6247ab423b35e3e7a1824db3 Mon Sep 17 00:00:00 2001 From: Daniel Campello Date: Mon, 20 Jul 2015 16:33:46 -0700 Subject: [PATCH 0987/1442] ANDROID: Changed type-casting in packagelist management Fixed existing type-casting in packagelist management code. All warnings at compile time were taken care of. Change-Id: I1ea97786d1d1325f31b9f09ae966af1f896a2af5 Signed-off-by: Daniel Campello --- fs/sdcardfs/packagelist.c | 40 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index d7ba8d4a423e..f11591da141d 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -31,7 +31,7 @@ struct hashtable_entry { struct hlist_node hlist; void *key; - int value; + unsigned int value; }; struct packagelist_data { @@ -54,7 +54,7 @@ static const char* const kpackageslist_file = "/data/system/packages.list"; /* Supplementary groups to execute with */ static const gid_t kgroups[1] = { AID_PACKAGE_INFO }; -static unsigned int str_hash(void *key) { +static unsigned int str_hash(const char *key) { int i; unsigned int h = strlen(key); char *data = (char *)key; @@ -66,13 +66,13 @@ static unsigned int str_hash(void *key) { return h; } -static int contain_appid_key(struct packagelist_data *pkgl_dat, void *appid) { +static int contain_appid_key(struct packagelist_data *pkgl_dat, unsigned int appid) { struct hashtable_entry *hash_cur; - hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, (unsigned int)appid) - - if (appid == hash_cur->key) + hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, appid) + if ((void *)(uintptr_t)appid == hash_cur->key) return 1; + return 0; } @@ -89,7 +89,7 @@ int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) { appid = multiuser_get_app_id(from_kuid(&init_user_ns, current_fsuid())); mutex_lock(&pkgl_dat->hashtable_lock); - ret = contain_appid_key(pkgl_dat, (void *)appid); + ret = contain_appid_key(pkgl_dat, appid); mutex_unlock(&pkgl_dat->hashtable_lock); return ret; } @@ -98,7 +98,7 @@ appid_t get_appid(void *pkgl_id, const char *app_name) { struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; struct hashtable_entry *hash_cur; - unsigned int hash = str_hash((void *)app_name); + unsigned int hash = str_hash(app_name); appid_t ret_id; //printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash); @@ -171,7 +171,9 @@ int open_flags_to_access_mode(int open_flags) { } } -static int insert_str_to_int(struct packagelist_data *pkgl_dat, void *key, int value) { +static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key, + unsigned int value) +{ struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; unsigned int hash = str_hash(key); @@ -198,14 +200,15 @@ static void remove_str_to_int(struct hashtable_entry *h_entry) { kmem_cache_free(hashtable_entry_cachep, h_entry); } -static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int value) { +static int insert_int_to_null(struct packagelist_data *pkgl_dat, unsigned int key, + unsigned int value) +{ struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value); - hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, - (unsigned int)key) { - if (key == hash_cur->key) { + hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, key) { + if ((void *)(uintptr_t)key == hash_cur->key) { hash_cur->value = value; return 0; } @@ -213,10 +216,9 @@ static int insert_int_to_null(struct packagelist_data *pkgl_dat, void *key, int new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL); if (!new_entry) return -ENOMEM; - new_entry->key = key; + new_entry->key = (void *)(uintptr_t)key; new_entry->value = value; - hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist, - (unsigned int)new_entry->key); + hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist, key); return 0; } @@ -260,7 +262,7 @@ static int read_package_list(struct packagelist_data *pkgl_dat) { while ((read_amount = sys_read(fd, pkgl_dat->read_buf, sizeof(pkgl_dat->read_buf))) > 0) { - int appid; + unsigned int appid; char *token; int one_line_len = 0; int additional_read; @@ -277,7 +279,7 @@ static int read_package_list(struct packagelist_data *pkgl_dat) { if (additional_read > 0) sys_lseek(fd, -additional_read, SEEK_CUR); - if (sscanf(pkgl_dat->read_buf, "%s %d %*d %*s %*s %s", + if (sscanf(pkgl_dat->read_buf, "%s %u %*d %*s %*s %s", pkgl_dat->app_name_buf, &appid, pkgl_dat->gids_buf) == 3) { ret = insert_str_to_int(pkgl_dat, pkgl_dat->app_name_buf, appid); @@ -291,7 +293,7 @@ static int read_package_list(struct packagelist_data *pkgl_dat) { while (token != NULL) { if (!kstrtoul(token, 10, &ret_gid) && (ret_gid == pkgl_dat->write_gid)) { - ret = insert_int_to_null(pkgl_dat, (void *)appid, 1); + ret = insert_int_to_null(pkgl_dat, appid, 1); if (ret) { sys_close(fd); mutex_unlock(&pkgl_dat->hashtable_lock); -- GitLab From 497ac90d42ff00723021280b95a3f41c4ba114be Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 3 Feb 2016 21:08:21 -0800 Subject: [PATCH 0988/1442] ANDROID: sdcardfs: Bring up to date with Android M permissions: In M, the workings of sdcardfs were changed significantly. This brings sdcardfs into line with the changes. Change-Id: I10e91a84a884c838feef7aa26c0a2b21f02e052e --- fs/sdcardfs/Kconfig | 1 + fs/sdcardfs/derived_perm.c | 119 ++++----- fs/sdcardfs/file.c | 10 +- fs/sdcardfs/inode.c | 78 +++--- fs/sdcardfs/lookup.c | 40 +-- fs/sdcardfs/main.c | 141 +++++------ fs/sdcardfs/packagelist.c | 504 ++++++++++++++++++------------------- fs/sdcardfs/sdcardfs.h | 134 ++++++---- fs/sdcardfs/strtok.h | 75 ------ fs/sdcardfs/super.c | 11 +- 10 files changed, 501 insertions(+), 612 deletions(-) delete mode 100644 fs/sdcardfs/strtok.h diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig index d995f3eaae6d..ab25f88ebb37 100644 --- a/fs/sdcardfs/Kconfig +++ b/fs/sdcardfs/Kconfig @@ -1,5 +1,6 @@ config SDCARD_FS tristate "sdcard file system" + depends on CONFIGFS_FS default n help Sdcardfs is based on Wrapfs file system. diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 00c33a471dcc..128b3e56851f 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -29,24 +29,23 @@ static void inherit_derived_state(struct inode *parent, struct inode *child) ci->perm = PERM_INHERIT; ci->userid = pi->userid; ci->d_uid = pi->d_uid; - ci->d_gid = pi->d_gid; - ci->d_mode = pi->d_mode; + ci->under_android = pi->under_android; } /* helper function for derived state */ void setup_derived_state(struct inode *inode, perm_t perm, - userid_t userid, uid_t uid, gid_t gid, mode_t mode) + userid_t userid, uid_t uid, bool under_android) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); info->perm = perm; info->userid = userid; info->d_uid = uid; - info->d_gid = gid; - info->d_mode = mode; + info->under_android = under_android; } -void get_derived_permission(struct dentry *parent, struct dentry *dentry) +/* While renaming, there is a point where we want the path from dentry, but the name from newdentry */ +void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry) { struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct sdcardfs_inode_info *info = SDCARDFS_I(dentry->d_inode); @@ -63,86 +62,68 @@ void get_derived_permission(struct dentry *parent, struct dentry *dentry) inherit_derived_state(parent->d_inode, dentry->d_inode); - //printk(KERN_INFO "sdcardfs: derived: %s, %s, %d\n", parent->d_name.name, - // dentry->d_name.name, parent_info->perm); - - if (sbi->options.derive == DERIVE_NONE) { - return; - } - /* Derive custom permissions based on parent and current node */ switch (parent_info->perm) { case PERM_INHERIT: /* Already inherited above */ break; - case PERM_LEGACY_PRE_ROOT: + case PERM_PRE_ROOT: /* Legacy internal layout places users at top level */ info->perm = PERM_ROOT; - info->userid = simple_strtoul(dentry->d_name.name, NULL, 10); + info->userid = simple_strtoul(newdentry->d_name.name, NULL, 10); break; case PERM_ROOT: /* Assume masked off by default. */ - info->d_mode = 00770; - if (!strcasecmp(dentry->d_name.name, "Android")) { + if (!strcasecmp(newdentry->d_name.name, "Android")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID; - info->d_mode = 00771; - } else if (sbi->options.split_perms) { - if (!strcasecmp(dentry->d_name.name, "DCIM") - || !strcasecmp(dentry->d_name.name, "Pictures")) { - info->d_gid = AID_SDCARD_PICS; - } else if (!strcasecmp(dentry->d_name.name, "Alarms") - || !strcasecmp(dentry->d_name.name, "Movies") - || !strcasecmp(dentry->d_name.name, "Music") - || !strcasecmp(dentry->d_name.name, "Notifications") - || !strcasecmp(dentry->d_name.name, "Podcasts") - || !strcasecmp(dentry->d_name.name, "Ringtones")) { - info->d_gid = AID_SDCARD_AV; - } + info->under_android = true; } break; case PERM_ANDROID: - if (!strcasecmp(dentry->d_name.name, "data")) { + if (!strcasecmp(newdentry->d_name.name, "data")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID_DATA; - info->d_mode = 00771; - } else if (!strcasecmp(dentry->d_name.name, "obb")) { + } else if (!strcasecmp(newdentry->d_name.name, "obb")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID_OBB; - info->d_mode = 00771; - // FIXME : this feature will be implemented later. /* Single OBB directory is always shared */ - } else if (!strcasecmp(dentry->d_name.name, "user")) { - /* User directories must only be accessible to system, protected - * by sdcard_all. Zygote will bind mount the appropriate user- - * specific path. */ - info->perm = PERM_ANDROID_USER; - info->d_gid = AID_SDCARD_ALL; - info->d_mode = 00770; + } else if (!strcasecmp(newdentry->d_name.name, "media")) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_MEDIA; } break; - /* same policy will be applied on PERM_ANDROID_DATA - * and PERM_ANDROID_OBB */ case PERM_ANDROID_DATA: case PERM_ANDROID_OBB: - appid = get_appid(sbi->pkgl_id, dentry->d_name.name); + case PERM_ANDROID_MEDIA: + appid = get_appid(sbi->pkgl_id, newdentry->d_name.name); if (appid != 0) { info->d_uid = multiuser_get_uid(parent_info->userid, appid); } - info->d_mode = 00770; - break; - case PERM_ANDROID_USER: - /* Root of a secondary user */ - info->perm = PERM_ROOT; - info->userid = simple_strtoul(dentry->d_name.name, NULL, 10); - info->d_gid = AID_SDCARD_R; - info->d_mode = 00771; break; } } +void get_derived_permission(struct dentry *parent, struct dentry *dentry) +{ + get_derived_permission_new(parent, dentry, dentry); +} + +void get_derive_permissions_recursive(struct dentry *parent) { + struct dentry *dentry; + list_for_each_entry(dentry, &parent->d_subdirs, d_child) { + if (dentry && dentry->d_inode) { + mutex_lock(&dentry->d_inode->i_mutex); + get_derived_permission(parent, dentry); + fix_derived_permission(dentry->d_inode); + get_derive_permissions_recursive(dentry); + mutex_unlock(&dentry->d_inode->i_mutex); + } + } +} + /* main function for updating derived permission */ -inline void update_derived_permission(struct dentry *dentry) +inline void update_derived_permission_lock(struct dentry *dentry) { struct dentry *parent; @@ -154,6 +135,7 @@ inline void update_derived_permission(struct dentry *dentry) * 1. need to check whether the dentry is updated or not * 2. remove the root dentry update */ + mutex_lock(&dentry->d_inode->i_mutex); if(IS_ROOT(dentry)) { //setup_default_pre_root_state(dentry->d_inode); } else { @@ -164,6 +146,7 @@ inline void update_derived_permission(struct dentry *dentry) } } fix_derived_permission(dentry->d_inode); + mutex_unlock(&dentry->d_inode->i_mutex); } int need_graft_path(struct dentry *dentry) @@ -177,7 +160,7 @@ int need_graft_path(struct dentry *dentry) !strcasecmp(dentry->d_name.name, "obb")) { /* /Android/obb is the base obbpath of DERIVED_UNIFIED */ - if(!(sbi->options.derive == DERIVE_UNIFIED + if(!(sbi->options.multiuser == false && parent_info->userid == 0)) { ret = 1; } @@ -207,8 +190,7 @@ int is_obbpath_invalid(struct dentry *dent) path_buf = kmalloc(PATH_MAX, GFP_ATOMIC); if(!path_buf) { ret = 1; - printk(KERN_ERR "sdcardfs: " - "fail to allocate path_buf in %s.\n", __func__); + printk(KERN_ERR "sdcardfs: fail to allocate path_buf in %s.\n", __func__); } else { obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX); if (d_unhashed(di->lower_path.dentry) || @@ -234,21 +216,16 @@ int is_base_obbpath(struct dentry *dentry) struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); spin_lock(&SDCARDFS_D(dentry)->lock); - /* DERIVED_LEGACY */ - if(parent_info->perm == PERM_LEGACY_PRE_ROOT && - !strcasecmp(dentry->d_name.name, "obb")) { - ret = 1; - } - /* DERIVED_UNIFIED :/Android/obb is the base obbpath */ - else if (parent_info->perm == PERM_ANDROID && - !strcasecmp(dentry->d_name.name, "obb")) { - if((sbi->options.derive == DERIVE_UNIFIED - && parent_info->userid == 0)) { + if (sbi->options.multiuser) { + if(parent_info->perm == PERM_PRE_ROOT && + !strcasecmp(dentry->d_name.name, "obb")) { ret = 1; } + } else if (parent_info->perm == PERM_ANDROID && + !strcasecmp(dentry->d_name.name, "obb")) { + ret = 1; } spin_unlock(&SDCARDFS_D(dentry)->lock); - dput(parent); return ret; } @@ -272,8 +249,7 @@ int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) if(!err) { /* the obbpath base has been found */ - printk(KERN_INFO "sdcardfs: " - "the sbi->obbpath is found\n"); + printk(KERN_INFO "sdcardfs: the sbi->obbpath is found\n"); pathcpy(lower_path, &obbpath); } else { /* if the sbi->obbpath is not available, we can optionally @@ -281,8 +257,7 @@ int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) * but, the current implementation just returns an error * because the sdcard daemon also regards this case as * a lookup fail. */ - printk(KERN_INFO "sdcardfs: " - "the sbi->obbpath is not available\n"); + printk(KERN_INFO "sdcardfs: the sbi->obbpath is not available\n"); } return err; } diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index f9c5eaafc619..c249fa982d3c 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -209,7 +209,6 @@ static int sdcardfs_open(struct inode *inode, struct file *file) struct dentry *parent = dget_parent(dentry); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; - int has_rw; /* don't open unhashed/deleted files */ if (d_unhashed(dentry)) { @@ -217,11 +216,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file) goto out_err; } - has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, - sbi->options.derive, - open_flags_to_access_mode(file->f_flags), has_rw)) { + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -257,8 +252,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file) if (err) kfree(SDCARDFS_F(file)); else { - fsstack_copy_attr_all(inode, sdcardfs_lower_inode(inode)); - fix_derived_permission(inode); + sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode)); } out_revert_cred: diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 75c622bac2f5..2528da0d3ae1 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -55,11 +55,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, struct dentry *lower_dentry; struct dentry *lower_parent_dentry = NULL; struct path lower_path; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; - int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -80,7 +78,7 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, if (err) goto out; - err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, SDCARDFS_I(dir)->userid); if (err) goto out; fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); @@ -143,11 +141,9 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) struct inode *lower_dir_inode = sdcardfs_lower_inode(dir); struct dentry *lower_dir_dentry; struct path lower_path; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; - int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -255,8 +251,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode int fullpath_namelen; int touch_err = 0; - int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -293,19 +288,19 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode if(err) { /* if the sbi->obbpath is not available, the lower_path won't be * changed by setup_obb_dentry() but the lower path is saved to - * its orig_path. this dentry will be revalidated later. + * its orig_path. this dentry will be revalidated later. * but now, the lower_path should be NULL */ sdcardfs_put_reset_lower_path(dentry); /* the newly created lower path which saved to its orig_path or * the lower_path is the base obbpath. - * therefore, an additional path_get is required */ + * therefore, an additional path_get is required */ path_get(&lower_path); } else make_nomedia_in_obb = 1; } - err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path); + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pi->userid); if (err) goto out; @@ -314,7 +309,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* update number of links on parent directory */ set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink); - if ((sbi->options.derive == DERIVE_UNIFIED) && (!strcasecmp(dentry->d_name.name, "obb")) + if ((!sbi->options.multiuser) && (!strcasecmp(dentry->d_name.name, "obb")) && (pi->perm == PERM_ANDROID) && (pi->userid == 0)) make_nomedia_in_obb = 1; @@ -371,12 +366,9 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) struct dentry *lower_dir_dentry; int err; struct path lower_path; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; - //char *path_s = NULL; - int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - if(!check_caller_access_to_name(dir, dentry->d_name.name, sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -461,14 +453,10 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *trap = NULL; struct dentry *new_parent = NULL; struct path lower_old_path, lower_new_path; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(old_dentry->d_sb); const struct cred *saved_cred = NULL; - int has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); - if(!check_caller_access_to_name(old_dir, old_dentry->d_name.name, - sbi->options.derive, 1, has_rw) || - !check_caller_access_to_name(new_dir, new_dentry->d_name.name, - sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(old_dir, old_dentry->d_name.name) || + !check_caller_access_to_name(new_dir, new_dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " new_dentry: %s, task:%s\n", __func__, new_dentry->d_name.name, current->comm); @@ -505,26 +493,31 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; /* Copy attrs from lower dir, but i_uid/i_gid */ - fsstack_copy_attr_all(new_dir, d_inode(lower_new_dir_dentry)); + sdcardfs_copy_and_fix_attrs(new_dir, d_inode(lower_new_dir_dentry)); fsstack_copy_inode_size(new_dir, d_inode(lower_new_dir_dentry)); - fix_derived_permission(new_dir); + if (new_dir != old_dir) { - fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry)); + sdcardfs_copy_and_fix_attrs(old_dir, d_inode(lower_old_dir_dentry)); fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry)); - fix_derived_permission(old_dir); + /* update the derived permission of the old_dentry * with its new parent */ new_parent = dget_parent(new_dentry); if(new_parent) { if(d_inode(old_dentry)) { - get_derived_permission(new_parent, old_dentry); - fix_derived_permission(d_inode(old_dentry)); + update_derived_permission_lock(old_dentry); } dput(new_parent); } } - + /* At this point, not all dentry information has been moved, so + * we pass along new_dentry for the name.*/ + mutex_lock(&d_inode(old_dentry)->i_mutex); + get_derived_permission_new(new_dentry->d_parent, old_dentry, new_dentry); + fix_derived_permission(d_inode(old_dentry)); + get_derive_permissions_recursive(old_dentry); + mutex_unlock(&d_inode(old_dentry)->i_mutex); out: unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); dput(lower_old_dir_dentry); @@ -639,9 +632,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) struct inode *lower_inode; struct path lower_path; struct iattr lower_ia; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct dentry *parent; - int has_rw; inode = d_inode(dentry); @@ -655,10 +646,8 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) /* no vfs_XXX operations required, cred overriding will be skipped. wj*/ if (!err) { /* check the Android group ID */ - has_rw = get_caller_has_rw_locked(sbi->pkgl_id, sbi->options.derive); parent = dget_parent(dentry); - if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name, - sbi->options.derive, 1, has_rw)) { + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -723,10 +712,8 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) if (err) goto out; - /* get attributes from the lower inode */ - fsstack_copy_attr_all(inode, lower_inode); - /* update derived permission of the upper inode */ - fix_derived_permission(inode); + /* get attributes from the lower inode and update derived permissions */ + sdcardfs_copy_and_fix_attrs(inode, lower_inode); /* * Not running fsstack_copy_inode_size(inode, lower_inode), because @@ -748,11 +735,9 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct inode *lower_inode; struct path lower_path; struct dentry *parent; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); parent = dget_parent(dentry); - if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name, - sbi->options.derive, 0, 0)) { + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); @@ -767,13 +752,10 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, lower_dentry = lower_path.dentry; lower_inode = sdcardfs_lower_inode(inode); - fsstack_copy_attr_all(inode, lower_inode); + + sdcardfs_copy_and_fix_attrs(inode, lower_inode); fsstack_copy_inode_size(inode, lower_inode); - /* if the dentry has been moved from other location - * so, on this stage, its derived permission must be - * rechecked from its private field. - */ - fix_derived_permission(inode); + generic_fillattr(inode, stat); sdcardfs_put_lower_path(dentry, &lower_path); diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index a4b94df99f32..f80abcb6b467 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -64,10 +64,17 @@ int new_dentry_private_data(struct dentry *dentry) return 0; } -static int sdcardfs_inode_test(struct inode *inode, void *candidate_lower_inode) +struct inode_data { + struct inode *lower_inode; + userid_t id; +}; + +static int sdcardfs_inode_test(struct inode *inode, void *candidate_data/*void *candidate_lower_inode*/) { struct inode *current_lower_inode = sdcardfs_lower_inode(inode); - if (current_lower_inode == (struct inode *)candidate_lower_inode) + userid_t current_userid = SDCARDFS_I(inode)->userid; + if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode && + current_userid == ((struct inode_data *)candidate_data)->id) return 1; /* found a match */ else return 0; /* no match */ @@ -79,12 +86,15 @@ static int sdcardfs_inode_set(struct inode *inode, void *lower_inode) return 0; } -struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode) +struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, userid_t id) { struct sdcardfs_inode_info *info; + struct inode_data data; struct inode *inode; /* the new inode to return */ int err; + data.id = id; + data.lower_inode = lower_inode; inode = iget5_locked(sb, /* our superblock */ /* * hashval: we use inode number, but we can @@ -94,7 +104,7 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode) lower_inode->i_ino, /* hashval */ sdcardfs_inode_test, /* inode comparison function */ sdcardfs_inode_set, /* inode init function */ - lower_inode); /* data passed to test+set fxns */ + &data); /* data passed to test+set fxns */ if (!inode) { err = -EACCES; iput(lower_inode); @@ -146,11 +156,9 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode) lower_inode->i_rdev); /* all well, copy inode attributes */ - fsstack_copy_attr_all(inode, lower_inode); + sdcardfs_copy_and_fix_attrs(inode, lower_inode); fsstack_copy_inode_size(inode, lower_inode); - fix_derived_permission(inode); - unlock_new_inode(inode); return inode; } @@ -164,7 +172,7 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode) * @lower_path: the lower path (caller does path_get/put) */ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, - struct path *lower_path) + struct path *lower_path, userid_t id) { int err = 0; struct inode *inode; @@ -186,14 +194,14 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, */ /* inherit lower inode number for sdcardfs's inode */ - inode = sdcardfs_iget(sb, lower_inode); + inode = sdcardfs_iget(sb, lower_inode, id); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out; } d_add(dentry, inode); - update_derived_permission(dentry); + update_derived_permission_lock(dentry); out: return err; } @@ -205,7 +213,7 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, * Fills in lower_parent_path with on success. */ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, - unsigned int flags, struct path *lower_parent_path) + unsigned int flags, struct path *lower_parent_path, userid_t id) { int err = 0; struct vfsmount *lower_dir_mnt; @@ -266,7 +274,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, } sdcardfs_set_lower_path(dentry, &lower_path); - err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path); + err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id); if (err) /* path_put underlying path on error */ sdcardfs_put_reset_lower_path(dentry); goto out; @@ -328,13 +336,11 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, struct dentry *ret = NULL, *parent; struct path lower_parent_path; int err = 0; - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; parent = dget_parent(dentry); - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name, - sbi->options.derive, 0, 0)) { + if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) { ret = ERR_PTR(-EACCES); printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", @@ -354,7 +360,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path); + ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path, SDCARDFS_I(dir)->userid); if (IS_ERR(ret)) { goto out; @@ -365,8 +371,10 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, fsstack_copy_attr_times(dentry->d_inode, sdcardfs_lower_inode(dentry->d_inode)); /* get drived permission */ + mutex_lock(&dentry->d_inode->i_mutex); get_derived_permission(parent, dentry); fix_derived_permission(dentry->d_inode); + mutex_unlock(&dentry->d_inode->i_mutex); } /* update parent directory's atime */ fsstack_copy_attr_atime(parent->d_inode, diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 9d04ae8ceb46..80aa355d801e 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -24,25 +24,27 @@ #include enum { - Opt_uid, + Opt_fsuid, + Opt_fsgid, Opt_gid, - Opt_wgid, Opt_debug, - Opt_split, - Opt_derive, Opt_lower_fs, + Opt_mask, + Opt_multiuser, // May need? + Opt_userid, Opt_reserved_mb, Opt_err, }; static const match_table_t sdcardfs_tokens = { - {Opt_uid, "uid=%u"}, + {Opt_fsuid, "fsuid=%u"}, + {Opt_fsgid, "fsgid=%u"}, {Opt_gid, "gid=%u"}, - {Opt_wgid, "wgid=%u"}, {Opt_debug, "debug"}, - {Opt_split, "split"}, - {Opt_derive, "derive=%s"}, {Opt_lower_fs, "lower_fs=%s"}, + {Opt_mask, "mask=%u"}, + {Opt_userid, "userid=%d"}, + {Opt_multiuser, "multiuser"}, {Opt_reserved_mb, "reserved_mb=%u"}, {Opt_err, NULL} }; @@ -58,12 +60,10 @@ static int parse_options(struct super_block *sb, char *options, int silent, /* by default, we use AID_MEDIA_RW as uid, gid */ opts->fs_low_uid = AID_MEDIA_RW; opts->fs_low_gid = AID_MEDIA_RW; - /* by default, we use AID_SDCARD_RW as write_gid */ - opts->write_gid = AID_SDCARD_RW; - /* default permission policy - * (DERIVE_NONE | DERIVE_LEGACY | DERIVE_UNIFIED) */ - opts->derive = DERIVE_NONE; - opts->split_perms = 0; + opts->mask = 0; + opts->multiuser = false; + opts->fs_user_id = 0; + opts->gid = 0; /* by default, we use LOWER_FS_EXT4 as lower fs type */ opts->lower_fs = LOWER_FS_EXT4; /* by default, 0MB is reserved */ @@ -85,37 +85,33 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_debug: *debug = 1; break; - case Opt_uid: + case Opt_fsuid: if (match_int(&args[0], &option)) return 0; opts->fs_low_uid = option; break; - case Opt_gid: + case Opt_fsgid: if (match_int(&args[0], &option)) return 0; opts->fs_low_gid = option; break; - case Opt_wgid: + case Opt_gid: if (match_int(&args[0], &option)) return 0; - opts->write_gid = option; + opts->gid = option; break; - case Opt_split: - opts->split_perms=1; + case Opt_userid: + if (match_int(&args[0], &option)) + return 0; + opts->fs_user_id = option; break; - case Opt_derive: - string_option = match_strdup(&args[0]); - if (!strcmp("none", string_option)) { - opts->derive = DERIVE_NONE; - } else if (!strcmp("legacy", string_option)) { - opts->derive = DERIVE_LEGACY; - } else if (!strcmp("unified", string_option)) { - opts->derive = DERIVE_UNIFIED; - } else { - kfree(string_option); - goto invalid_option; - } - kfree(string_option); + case Opt_mask: + if (match_int(&args[0], &option)) + return 0; + opts->mask = option; + break; + case Opt_multiuser: + opts->multiuser = true; break; case Opt_lower_fs: string_option = match_strdup(&args[0]); @@ -184,6 +180,11 @@ static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb) } #endif +DEFINE_MUTEX(sdcardfs_super_list_lock); +LIST_HEAD(sdcardfs_super_list); +EXPORT_SYMBOL_GPL(sdcardfs_super_list_lock); +EXPORT_SYMBOL_GPL(sdcardfs_super_list); + /* * There is no need to lock the sdcardfs_super_info's rwsem as there is no * way anyone can have a reference to the superblock at this point in time. @@ -196,7 +197,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, struct super_block *lower_sb; struct path lower_path; struct sdcardfs_sb_info *sb_info; - void *pkgl_id; struct inode *inode; printk(KERN_INFO "sdcardfs version 2.0\n"); @@ -215,8 +215,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &lower_path); if (err) { - printk(KERN_ERR "sdcardfs: error accessing " - "lower directory '%s'\n", dev_name); + printk(KERN_ERR "sdcardfs: error accessing lower directory '%s'\n", dev_name); goto out; } @@ -229,7 +228,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, } sb_info = sb->s_fs_info; - /* parse options */ err = parse_options(sb, raw_data, silent, &debug, &sb_info->options); if (err) { @@ -237,14 +235,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, goto out_freesbi; } - if (sb_info->options.derive != DERIVE_NONE) { - pkgl_id = packagelist_create(sb_info->options.write_gid); - if(IS_ERR(pkgl_id)) - goto out_freesbi; - else - sb_info->pkgl_id = pkgl_id; - } - /* set the lower superblock field of upper superblock */ lower_sb = lower_path.dentry->d_sb; atomic_inc(&lower_sb->s_active); @@ -263,7 +253,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, sb->s_op = &sdcardfs_sops; /* get a new inode and allocate our root dentry */ - inode = sdcardfs_iget(sb, lower_path.dentry->d_inode); + inode = sdcardfs_iget(sb, lower_path.dentry->d_inode, 0); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_sput; @@ -292,41 +282,22 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, d_rehash(sb->s_root); /* setup permission policy */ - switch(sb_info->options.derive) { - case DERIVE_NONE: - setup_derived_state(sb->s_root->d_inode, - PERM_ROOT, 0, AID_ROOT, AID_SDCARD_RW, 00775); - sb_info->obbpath_s = NULL; - break; - case DERIVE_LEGACY: - /* Legacy behavior used to support internal multiuser layout which - * places user_id at the top directory level, with the actual roots - * just below that. Shared OBB path is also at top level. */ - setup_derived_state(sb->s_root->d_inode, - PERM_LEGACY_PRE_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); - /* initialize the obbpath string and lookup the path - * sb_info->obb_path will be deactivated by path_put - * on sdcardfs_put_super */ - sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); - snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); - err = prepare_dir(sb_info->obbpath_s, + sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); + mutex_lock(&sdcardfs_super_list_lock); + if(sb_info->options.multiuser) { + setup_derived_state(sb->s_root->d_inode, PERM_PRE_ROOT, sb_info->options.fs_user_id, AID_ROOT, false); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); + /*err = prepare_dir(sb_info->obbpath_s, sb_info->options.fs_low_uid, - sb_info->options.fs_low_gid, 00755); - if(err) - printk(KERN_ERR "sdcardfs: %s: %d, error on creating %s\n", - __func__,__LINE__, sb_info->obbpath_s); - break; - case DERIVE_UNIFIED: - /* Unified multiuser layout which places secondary user_id under - * /Android/user and shared OBB path under /Android/obb. */ - setup_derived_state(sb->s_root->d_inode, - PERM_ROOT, 0, AID_ROOT, AID_SDCARD_R, 00771); - - sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); - snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); - break; + sb_info->options.fs_low_gid, 00755);*/ + } else { + setup_derived_state(sb->s_root->d_inode, PERM_ROOT, sb_info->options.fs_low_uid, AID_ROOT, false); + snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } fix_derived_permission(sb->s_root->d_inode); + sb_info->sb = sb; + list_add(&sb_info->list, &sdcardfs_super_list); + mutex_unlock(&sdcardfs_super_list_lock); if (!silent) printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n", @@ -341,7 +312,6 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, out_sput: /* drop refs we took earlier */ atomic_dec(&lower_sb->s_active); - packagelist_destroy(sb_info->pkgl_id); out_freesbi: kfree(SDCARDFS_SB(sb)); sb->s_fs_info = NULL; @@ -386,11 +356,22 @@ struct dentry *sdcardfs_mount(struct file_system_type *fs_type, int flags, raw_data, sdcardfs_read_super); } +void sdcardfs_kill_sb(struct super_block *sb) { + struct sdcardfs_sb_info *sbi; + if (sb->s_magic == SDCARDFS_SUPER_MAGIC) { + sbi = SDCARDFS_SB(sb); + mutex_lock(&sdcardfs_super_list_lock); + list_del(&sbi->list); + mutex_unlock(&sdcardfs_super_list_lock); + } + generic_shutdown_super(sb); +} + static struct file_system_type sdcardfs_fs_type = { .owner = THIS_MODULE, .name = SDCARDFS_NAME, .mount = sdcardfs_mount, - .kill_sb = generic_shutdown_super, + .kill_sb = sdcardfs_kill_sb, .fs_flags = 0, }; diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index f11591da141d..ba3478d94107 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -19,13 +19,16 @@ */ #include "sdcardfs.h" -#include "strtok.h" #include -#include -#include -#include #include + +#include +#include +#include + +#include + #define STRING_BUF_SIZE (512) struct hashtable_entry { @@ -34,25 +37,20 @@ struct hashtable_entry { unsigned int value; }; +struct sb_list { + struct super_block *sb; + struct list_head list; +}; + struct packagelist_data { DECLARE_HASHTABLE(package_to_appid,8); - DECLARE_HASHTABLE(appid_with_rw,7); struct mutex hashtable_lock; - struct task_struct *thread_id; - gid_t write_gid; - char *strtok_last; - char read_buf[STRING_BUF_SIZE]; - char event_buf[STRING_BUF_SIZE]; - char app_name_buf[STRING_BUF_SIZE]; - char gids_buf[STRING_BUF_SIZE]; + }; -static struct kmem_cache *hashtable_entry_cachep; +static struct packagelist_data *pkgl_data_all; -/* Path to system-provided mapping of package name to appIds */ -static const char* const kpackageslist_file = "/data/system/packages.list"; -/* Supplementary groups to execute with */ -static const gid_t kgroups[1] = { AID_PACKAGE_INFO }; +static struct kmem_cache *hashtable_entry_cachep; static unsigned int str_hash(const char *key) { int i; @@ -66,62 +64,29 @@ static unsigned int str_hash(const char *key) { return h; } -static int contain_appid_key(struct packagelist_data *pkgl_dat, unsigned int appid) { - struct hashtable_entry *hash_cur; - - hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, appid) - if ((void *)(uintptr_t)appid == hash_cur->key) - return 1; - - return 0; -} - -/* Return if the calling UID holds sdcard_rw. */ -int get_caller_has_rw_locked(void *pkgl_id, derive_t derive) { - struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; - appid_t appid; - int ret; - - /* No additional permissions enforcement */ - if (derive == DERIVE_NONE) { - return 1; - } - - appid = multiuser_get_app_id(from_kuid(&init_user_ns, current_fsuid())); - mutex_lock(&pkgl_dat->hashtable_lock); - ret = contain_appid_key(pkgl_dat, appid); - mutex_unlock(&pkgl_dat->hashtable_lock); - return ret; -} - appid_t get_appid(void *pkgl_id, const char *app_name) { - struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; + struct packagelist_data *pkgl_dat = pkgl_data_all; struct hashtable_entry *hash_cur; unsigned int hash = str_hash(app_name); appid_t ret_id; - //printk(KERN_INFO "sdcardfs: %s: %s, %u\n", __func__, (char *)app_name, hash); mutex_lock(&pkgl_dat->hashtable_lock); hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { - //printk(KERN_INFO "sdcardfs: %s: %s\n", __func__, (char *)hash_cur->key); if (!strcasecmp(app_name, hash_cur->key)) { ret_id = (appid_t)hash_cur->value; mutex_unlock(&pkgl_dat->hashtable_lock); - //printk(KERN_INFO "=> app_id: %d\n", (int)ret_id); return ret_id; } } mutex_unlock(&pkgl_dat->hashtable_lock); - //printk(KERN_INFO "=> app_id: %d\n", 0); return 0; } /* Kernel has already enforced everything we returned through * derive_permissions_locked(), so this is used to lock down access * even further, such as enforcing that apps hold sdcard_rw. */ -int check_caller_access_to_name(struct inode *parent_node, const char* name, - derive_t derive, int w_ok, int has_rw) { +int check_caller_access_to_name(struct inode *parent_node, const char* name) { /* Always block security-sensitive files at root */ if (parent_node && SDCARDFS_I(parent_node)->perm == PERM_ROOT) { @@ -132,28 +97,12 @@ int check_caller_access_to_name(struct inode *parent_node, const char* name, } } - /* No additional permissions enforcement */ - if (derive == DERIVE_NONE) { - return 1; - } - /* Root always has access; access for any other UIDs should always * be controlled through packages.list. */ if (from_kuid(&init_user_ns, current_fsuid()) == 0) { return 1; } - /* If asking to write, verify that caller either owns the - * parent or holds sdcard_rw. */ - if (w_ok) { - if (parent_node && - (from_kuid(&init_user_ns, current_fsuid()) == - SDCARDFS_I(parent_node)->d_uid)) { - return 1; - } - return has_rw; - } - /* No extra permissions to enforce */ return 1; } @@ -171,14 +120,13 @@ int open_flags_to_access_mode(int open_flags) { } } -static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key, +static int insert_str_to_int_lock(struct packagelist_data *pkgl_dat, char *key, unsigned int value) { struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; unsigned int hash = str_hash(key); - //printk(KERN_INFO "sdcardfs: %s: %s: %d, %u\n", __func__, (char *)key, value, hash); hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { if (!strcasecmp(key, hash_cur->key)) { hash_cur->value = value; @@ -194,245 +142,275 @@ static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key, return 0; } -static void remove_str_to_int(struct hashtable_entry *h_entry) { - //printk(KERN_INFO "sdcardfs: %s: %s: %d\n", __func__, (char *)h_entry->key, h_entry->value); - kfree(h_entry->key); - kmem_cache_free(hashtable_entry_cachep, h_entry); +static void fixup_perms(struct super_block *sb) { + if (sb && sb->s_magic == SDCARDFS_SUPER_MAGIC) { + mutex_lock(&sb->s_root->d_inode->i_mutex); + get_derive_permissions_recursive(sb->s_root); + mutex_unlock(&sb->s_root->d_inode->i_mutex); + } } -static int insert_int_to_null(struct packagelist_data *pkgl_dat, unsigned int key, - unsigned int value) -{ - struct hashtable_entry *hash_cur; - struct hashtable_entry *new_entry; +static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key, + unsigned int value) { + int ret; + struct sdcardfs_sb_info *sbinfo; + mutex_lock(&sdcardfs_super_list_lock); + mutex_lock(&pkgl_dat->hashtable_lock); + ret = insert_str_to_int_lock(pkgl_dat, key, value); + mutex_unlock(&pkgl_dat->hashtable_lock); - //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)key, value); - hash_for_each_possible(pkgl_dat->appid_with_rw, hash_cur, hlist, key) { - if ((void *)(uintptr_t)key == hash_cur->key) { - hash_cur->value = value; - return 0; + list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { + if (sbinfo) { + fixup_perms(sbinfo->sb); } } - new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL); - if (!new_entry) - return -ENOMEM; - new_entry->key = (void *)(uintptr_t)key; - new_entry->value = value; - hash_add(pkgl_dat->appid_with_rw, &new_entry->hlist, key); - return 0; + mutex_unlock(&sdcardfs_super_list_lock); + return ret; } -static void remove_int_to_null(struct hashtable_entry *h_entry) { - //printk(KERN_INFO "sdcardfs: %s: %d: %d\n", __func__, (int)h_entry->key, h_entry->value); +static void remove_str_to_int_lock(struct hashtable_entry *h_entry) { + kfree(h_entry->key); + hash_del(&h_entry->hlist); kmem_cache_free(hashtable_entry_cachep, h_entry); } +static void remove_str_to_int(struct packagelist_data *pkgl_dat, const char *key) +{ + struct sdcardfs_sb_info *sbinfo; + struct hashtable_entry *hash_cur; + unsigned int hash = str_hash(key); + mutex_lock(&sdcardfs_super_list_lock); + mutex_lock(&pkgl_dat->hashtable_lock); + hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { + if (!strcasecmp(key, hash_cur->key)) { + remove_str_to_int_lock(hash_cur); + break; + } + } + mutex_unlock(&pkgl_dat->hashtable_lock); + list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { + if (sbinfo) { + fixup_perms(sbinfo->sb); + } + } + mutex_unlock(&sdcardfs_super_list_lock); + return; +} + static void remove_all_hashentrys(struct packagelist_data *pkgl_dat) { struct hashtable_entry *hash_cur; struct hlist_node *h_t; int i; - + mutex_lock(&pkgl_dat->hashtable_lock); hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist) - remove_str_to_int(hash_cur); - hash_for_each_safe(pkgl_dat->appid_with_rw, i, h_t, hash_cur, hlist) - remove_int_to_null(hash_cur); - + remove_str_to_int_lock(hash_cur); + mutex_unlock(&pkgl_dat->hashtable_lock); hash_init(pkgl_dat->package_to_appid); - hash_init(pkgl_dat->appid_with_rw); } -static int read_package_list(struct packagelist_data *pkgl_dat) { - int ret; - int fd; - int read_amount; +static struct packagelist_data * packagelist_create(void) +{ + struct packagelist_data *pkgl_dat; - printk(KERN_INFO "sdcardfs: read_package_list\n"); + pkgl_dat = kmalloc(sizeof(*pkgl_dat), GFP_KERNEL | __GFP_ZERO); + if (!pkgl_dat) { + printk(KERN_ERR "sdcardfs: Failed to create hash\n"); + return ERR_PTR(-ENOMEM); + } - mutex_lock(&pkgl_dat->hashtable_lock); + mutex_init(&pkgl_dat->hashtable_lock); + hash_init(pkgl_dat->package_to_appid); + + return pkgl_dat; +} +static void packagelist_destroy(struct packagelist_data *pkgl_dat) +{ remove_all_hashentrys(pkgl_dat); + printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld\n"); + kfree(pkgl_dat); +} - fd = sys_open(kpackageslist_file, O_RDONLY, 0); - if (fd < 0) { - printk(KERN_ERR "sdcardfs: failed to open package list\n"); - mutex_unlock(&pkgl_dat->hashtable_lock); - return fd; - } +struct package_appid { + struct config_item item; + int add_pid; +}; - while ((read_amount = sys_read(fd, pkgl_dat->read_buf, - sizeof(pkgl_dat->read_buf))) > 0) { - unsigned int appid; - char *token; - int one_line_len = 0; - int additional_read; - unsigned long ret_gid; - - while (one_line_len < read_amount) { - if (pkgl_dat->read_buf[one_line_len] == '\n') { - one_line_len++; - break; - } - one_line_len++; - } - additional_read = read_amount - one_line_len; - if (additional_read > 0) - sys_lseek(fd, -additional_read, SEEK_CUR); - - if (sscanf(pkgl_dat->read_buf, "%s %u %*d %*s %*s %s", - pkgl_dat->app_name_buf, &appid, - pkgl_dat->gids_buf) == 3) { - ret = insert_str_to_int(pkgl_dat, pkgl_dat->app_name_buf, appid); - if (ret) { - sys_close(fd); - mutex_unlock(&pkgl_dat->hashtable_lock); - return ret; - } - - token = strtok_r(pkgl_dat->gids_buf, ",", &pkgl_dat->strtok_last); - while (token != NULL) { - if (!kstrtoul(token, 10, &ret_gid) && - (ret_gid == pkgl_dat->write_gid)) { - ret = insert_int_to_null(pkgl_dat, appid, 1); - if (ret) { - sys_close(fd); - mutex_unlock(&pkgl_dat->hashtable_lock); - return ret; - } - break; - } - token = strtok_r(NULL, ",", &pkgl_dat->strtok_last); - } - } - } +static inline struct package_appid *to_package_appid(struct config_item *item) +{ + return item ? container_of(item, struct package_appid, item) : NULL; +} - sys_close(fd); - mutex_unlock(&pkgl_dat->hashtable_lock); - return 0; +static ssize_t package_appid_attr_show(struct config_item *item, + char *page) +{ + ssize_t count; + count = sprintf(page, "%d\n", get_appid(pkgl_data_all, item->ci_name)); + return count; } -static int packagelist_reader(void *thread_data) +static ssize_t package_appid_attr_store(struct config_item *item, + const char *page, size_t count) { - struct packagelist_data *pkgl_dat = (struct packagelist_data *)thread_data; - struct inotify_event *event; - bool active = false; - int event_pos; - int event_size; - int res = 0; - int nfd; - - allow_signal(SIGINT); - - nfd = sys_inotify_init(); - if (nfd < 0) { - printk(KERN_ERR "sdcardfs: inotify_init failed: %d\n", nfd); - return nfd; - } + struct package_appid *package_appid = to_package_appid(item); + unsigned long tmp; + char *p = (char *) page; + int ret; - while (!kthread_should_stop()) { - if (signal_pending(current)) { - ssleep(1); - continue; - } + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; - if (!active) { - res = sys_inotify_add_watch(nfd, kpackageslist_file, IN_DELETE_SELF); - if (res < 0) { - if (res == -ENOENT || res == -EACCES) { - /* Framework may not have created yet, sleep and retry */ - printk(KERN_ERR "sdcardfs: missing packages.list; retrying\n"); - ssleep(2); - printk(KERN_ERR "sdcardfs: missing packages.list_end; retrying\n"); - continue; - } else { - printk(KERN_ERR "sdcardfs: inotify_add_watch failed: %d\n", res); - goto interruptable_sleep; - } - } - /* Watch above will tell us about any future changes, so - * read the current state. */ - res = read_package_list(pkgl_dat); - if (res) { - printk(KERN_ERR "sdcardfs: read_package_list failed: %d\n", res); - goto interruptable_sleep; - } - active = true; - } + if (tmp > INT_MAX) + return -ERANGE; + ret = insert_str_to_int(pkgl_data_all, item->ci_name, (unsigned int)tmp); + package_appid->add_pid = tmp; + if (ret) + return ret; - event_pos = 0; - res = sys_read(nfd, pkgl_dat->event_buf, sizeof(pkgl_dat->event_buf)); - if (res < (int) sizeof(*event)) { - if (res == -EINTR) - continue; - printk(KERN_ERR "sdcardfs: failed to read inotify event: %d\n", res); - goto interruptable_sleep; - } + return count; +} - while (res >= (int) sizeof(*event)) { - event = (struct inotify_event *) (pkgl_dat->event_buf + event_pos); +static struct configfs_attribute package_appid_attr_add_pid = { + .ca_owner = THIS_MODULE, + .ca_name = "appid", + .ca_mode = S_IRUGO | S_IWUGO, + .show = package_appid_attr_show, + .store = package_appid_attr_store, +}; - printk(KERN_INFO "sdcardfs: inotify event: %08x\n", event->mask); - if ((event->mask & IN_IGNORED) == IN_IGNORED) { - /* Previously watched file was deleted, probably due to move - * that swapped in new data; re-arm the watch and read. */ - active = false; - } +static struct configfs_attribute *package_appid_attrs[] = { + &package_appid_attr_add_pid, + NULL, +}; - event_size = sizeof(*event) + event->len; - res -= event_size; - event_pos += event_size; - } - continue; +static void package_appid_release(struct config_item *item) +{ + printk(KERN_INFO "sdcardfs: removing %s\n", item->ci_dentry->d_name.name); + /* item->ci_name is freed already, so we rely on the dentry */ + remove_str_to_int(pkgl_data_all, item->ci_dentry->d_name.name); + kfree(to_package_appid(item)); +} -interruptable_sleep: - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - } - flush_signals(current); - sys_close(nfd); - return res; +static struct configfs_item_operations package_appid_item_ops = { + .release = package_appid_release, +}; + +static struct config_item_type package_appid_type = { + .ct_item_ops = &package_appid_item_ops, + .ct_attrs = package_appid_attrs, + .ct_owner = THIS_MODULE, +}; + + +struct sdcardfs_packages { + struct config_group group; +}; + +static inline struct sdcardfs_packages *to_sdcardfs_packages(struct config_item *item) +{ + return item ? container_of(to_config_group(item), struct sdcardfs_packages, group) : NULL; } -void * packagelist_create(gid_t write_gid) +static struct config_item *sdcardfs_packages_make_item(struct config_group *group, const char *name) { - struct packagelist_data *pkgl_dat; - struct task_struct *packagelist_thread; + struct package_appid *package_appid; - pkgl_dat = kmalloc(sizeof(*pkgl_dat), GFP_KERNEL | __GFP_ZERO); - if (!pkgl_dat) { - printk(KERN_ERR "sdcardfs: creating kthread failed\n"); + package_appid = kzalloc(sizeof(struct package_appid), GFP_KERNEL); + if (!package_appid) return ERR_PTR(-ENOMEM); - } - mutex_init(&pkgl_dat->hashtable_lock); - hash_init(pkgl_dat->package_to_appid); - hash_init(pkgl_dat->appid_with_rw); - pkgl_dat->write_gid = write_gid; + config_item_init_type_name(&package_appid->item, name, + &package_appid_type); + + package_appid->add_pid = 0; - packagelist_thread = kthread_run(packagelist_reader, (void *)pkgl_dat, "pkgld"); - if (IS_ERR(packagelist_thread)) { - printk(KERN_ERR "sdcardfs: creating kthread failed\n"); - kfree(pkgl_dat); - return packagelist_thread; - } - pkgl_dat->thread_id = packagelist_thread; + return &package_appid->item; +} - printk(KERN_INFO "sdcardfs: created packagelist pkgld/%d\n", - (int)pkgl_dat->thread_id->pid); +static ssize_t packages_attr_show(struct config_item *item, + char *page) +{ + struct hashtable_entry *hash_cur; + struct hlist_node *h_t; + int i; + int count = 0; + mutex_lock(&pkgl_data_all->hashtable_lock); + hash_for_each_safe(pkgl_data_all->package_to_appid, i, h_t, hash_cur, hlist) + count += snprintf(page + count, PAGE_SIZE - count, "%s %d\n", (char *)hash_cur->key, hash_cur->value); + mutex_unlock(&pkgl_data_all->hashtable_lock); - return (void *)pkgl_dat; + + return count; } -void packagelist_destroy(void *pkgl_id) +static struct configfs_attribute sdcardfs_packages_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "packages_gid.list", + .ca_mode = S_IRUGO, + .show = packages_attr_show, +}; + +static struct configfs_attribute *sdcardfs_packages_attrs[] = { + &sdcardfs_packages_attr_description, + NULL, +}; + +static void sdcardfs_packages_release(struct config_item *item) { - struct packagelist_data *pkgl_dat = (struct packagelist_data *)pkgl_id; - pid_t pkgl_pid = pkgl_dat->thread_id->pid; - force_sig_info(SIGINT, SEND_SIG_PRIV, pkgl_dat->thread_id); - kthread_stop(pkgl_dat->thread_id); - remove_all_hashentrys(pkgl_dat); - printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld/%d\n", (int)pkgl_pid); - kfree(pkgl_dat); + printk(KERN_INFO "sdcardfs: destroyed something?\n"); + kfree(to_sdcardfs_packages(item)); +} + +static struct configfs_item_operations sdcardfs_packages_item_ops = { + .release = sdcardfs_packages_release, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations sdcardfs_packages_group_ops = { + .make_item = sdcardfs_packages_make_item, +}; + +static struct config_item_type sdcardfs_packages_type = { + .ct_item_ops = &sdcardfs_packages_item_ops, + .ct_group_ops = &sdcardfs_packages_group_ops, + .ct_attrs = sdcardfs_packages_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem sdcardfs_packages_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "sdcardfs", + .ci_type = &sdcardfs_packages_type, + }, + }, +}; + +static int __init configfs_sdcardfs_init(void) +{ + int ret; + struct configfs_subsystem *subsys = &sdcardfs_packages_subsys; + + config_group_init(&subsys->su_group); + mutex_init(&subsys->su_mutex); + ret = configfs_register_subsystem(subsys); + if (ret) { + printk(KERN_ERR "Error %d while registering subsystem %s\n", + ret, + subsys->su_group.cg_item.ci_namebuf); + } + return ret; +} + +static void __exit configfs_sdcardfs_exit(void) +{ + configfs_unregister_subsystem(&sdcardfs_packages_subsys); } int packagelist_init(void) @@ -445,13 +423,15 @@ int packagelist_init(void) return -ENOMEM; } + pkgl_data_all = packagelist_create(); + configfs_sdcardfs_init(); return 0; } void packagelist_exit(void) { + configfs_sdcardfs_exit(); + packagelist_destroy(pkgl_data_all); if (hashtable_entry_cachep) kmem_cache_destroy(hashtable_entry_cachep); } - - diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 51f6c7912584..1b85f4e70324 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -42,6 +42,7 @@ #include #include #include +#include #include "multiuser.h" /* the file system name */ @@ -70,10 +71,11 @@ #define fix_derived_permission(x) \ do { \ (x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid); \ - (x)->i_gid = make_kgid(&init_user_ns, SDCARDFS_I(x)->d_gid); \ - (x)->i_mode = ((x)->i_mode & S_IFMT) | SDCARDFS_I(x)->d_mode;\ + (x)->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(x))); \ + (x)->i_mode = ((x)->i_mode & S_IFMT) | get_mode(SDCARDFS_I(x));\ } while (0) + /* OVERRIDE_CRED() and REVERT_CRED() * OVERRID_CRED() * backup original task->cred @@ -99,35 +101,28 @@ (int)current->cred->fsuid, \ (int)current->cred->fsgid); -/* Android 4.4 support */ +/* Android 5.0 support */ /* Permission mode for a specific node. Controls how file permissions * are derived for children nodes. */ typedef enum { - /* Nothing special; this node should just inherit from its parent. */ - PERM_INHERIT, - /* This node is one level above a normal root; used for legacy layouts - * which use the first level to represent user_id. */ - PERM_LEGACY_PRE_ROOT, - /* This node is "/" */ - PERM_ROOT, - /* This node is "/Android" */ - PERM_ANDROID, - /* This node is "/Android/data" */ - PERM_ANDROID_DATA, - /* This node is "/Android/obb" */ - PERM_ANDROID_OBB, - /* This node is "/Android/user" */ - PERM_ANDROID_USER, + /* Nothing special; this node should just inherit from its parent. */ + PERM_INHERIT, + /* This node is one level above a normal root; used for legacy layouts + * which use the first level to represent user_id. */ + PERM_PRE_ROOT, + /* This node is "/" */ + PERM_ROOT, + /* This node is "/Android" */ + PERM_ANDROID, + /* This node is "/Android/data" */ + PERM_ANDROID_DATA, + /* This node is "/Android/obb" */ + PERM_ANDROID_OBB, + /* This node is "/Android/media" */ + PERM_ANDROID_MEDIA, } perm_t; -/* Permissions structure to derive */ -typedef enum { - DERIVE_NONE, - DERIVE_LEGACY, - DERIVE_UNIFIED, -} derive_t; - typedef enum { LOWER_FS_EXT4, LOWER_FS_FAT, @@ -161,9 +156,9 @@ extern void free_dentry_private_data(struct dentry *dentry); extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); extern struct inode *sdcardfs_iget(struct super_block *sb, - struct inode *lower_inode); + struct inode *lower_inode, userid_t id); extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, - struct path *lower_path); + struct path *lower_path, userid_t id); /* file private data */ struct sdcardfs_file_info { @@ -174,18 +169,16 @@ struct sdcardfs_file_info { /* sdcardfs inode data in memory */ struct sdcardfs_inode_info { struct inode *lower_inode; - /* state derived based on current position in hierachy - * caution: d_mode does not include file types - */ + /* state derived based on current position in hierachy */ perm_t perm; userid_t userid; uid_t d_uid; - gid_t d_gid; - mode_t d_mode; + bool under_android; struct inode vfs_inode; }; + /* sdcardfs dentry data in memory */ struct sdcardfs_dentry_info { spinlock_t lock; /* protects lower_path */ @@ -196,15 +189,17 @@ struct sdcardfs_dentry_info { struct sdcardfs_mount_options { uid_t fs_low_uid; gid_t fs_low_gid; - gid_t write_gid; - int split_perms; - derive_t derive; + userid_t fs_user_id; + gid_t gid; lower_fs_t lower_fs; + mode_t mask; + bool multiuser; unsigned int reserved_mb; }; /* sdcardfs super-block data in memory */ struct sdcardfs_sb_info { + struct super_block *sb; struct super_block *lower_sb; /* derived perm policy : some of options have been added * to sdcardfs_mount_options (Android 4.4 support) */ @@ -213,6 +208,7 @@ struct sdcardfs_sb_info { char *obbpath_s; struct path obbpath; void *pkgl_id; + struct list_head list; }; /* @@ -331,6 +327,44 @@ static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \ SDCARDFS_DENT_FUNC(lower_path) SDCARDFS_DENT_FUNC(orig_path) +static inline int get_gid(struct sdcardfs_inode_info *info) { + struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); + if (sb_info->options.gid == AID_SDCARD_RW) { + /* As an optimization, certain trusted system components only run + * as owner but operate across all users. Since we're now handing + * out the sdcard_rw GID only to trusted apps, we're okay relaxing + * the user boundary enforcement for the default view. The UIDs + * assigned to app directories are still multiuser aware. */ + return AID_SDCARD_RW; + } else { + return multiuser_get_uid(info->userid, sb_info->options.gid); + } +} +static inline int get_mode(struct sdcardfs_inode_info *info) { + int owner_mode; + int filtered_mode; + struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); + int visible_mode = 0775 & ~sb_info->options.mask; + + if (info->perm == PERM_PRE_ROOT) { + /* Top of multi-user view should always be visible to ensure + * secondary users can traverse inside. */ + visible_mode = 0711; + } else if (info->under_android) { + /* Block "other" access to Android directories, since only apps + * belonging to a specific user should be in there; we still + * leave +x open for the default view. */ + if (sb_info->options.gid == AID_SDCARD_RW) { + visible_mode = visible_mode & ~0006; + } else { + visible_mode = visible_mode & ~0007; + } + } + owner_mode = info->lower_inode->i_mode & 0700; + filtered_mode = visible_mode & (owner_mode | (owner_mode >> 3) | (owner_mode >> 6)); + return filtered_mode; +} + static inline int has_graft_path(const struct dentry *dent) { int ret = 0; @@ -364,22 +398,24 @@ static inline void sdcardfs_put_real_lower(const struct dentry *dent, sdcardfs_put_lower_path(dent, real_lower); } +extern struct mutex sdcardfs_super_list_lock; +extern struct list_head sdcardfs_super_list; + /* for packagelist.c */ -extern int get_caller_has_rw_locked(void *pkgl_id, derive_t derive); extern appid_t get_appid(void *pkgl_id, const char *app_name); -extern int check_caller_access_to_name(struct inode *parent_node, const char* name, - derive_t derive, int w_ok, int has_rw); +extern int check_caller_access_to_name(struct inode *parent_node, const char* name); extern int open_flags_to_access_mode(int open_flags); -extern void * packagelist_create(gid_t write_gid); -extern void packagelist_destroy(void *pkgl_id); extern int packagelist_init(void); extern void packagelist_exit(void); /* for derived_perm.c */ extern void setup_derived_state(struct inode *inode, perm_t perm, - userid_t userid, uid_t uid, gid_t gid, mode_t mode); + userid_t userid, uid_t uid, bool under_android); extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); -extern void update_derived_permission(struct dentry *dentry); +extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry); +extern void get_derive_permissions_recursive(struct dentry *parent); + +extern void update_derived_permission_lock(struct dentry *dentry); extern int need_graft_path(struct dentry *dentry); extern int is_base_obbpath(struct dentry *dentry); extern int is_obbpath_invalid(struct dentry *dentry); @@ -483,4 +519,18 @@ static inline int check_min_free_space(struct dentry *dentry, size_t size, int d return 1; } +/* Copies attrs and maintains sdcardfs managed attrs */ +static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct inode *src) +{ + dest->i_mode = (src->i_mode & S_IFMT) | get_mode(SDCARDFS_I(dest)); + dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->d_uid); + dest->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(dest))); + dest->i_rdev = src->i_rdev; + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; + dest->i_blkbits = src->i_blkbits; + dest->i_flags = src->i_flags; + set_nlink(dest, src->i_nlink); +} #endif /* not _SDCARDFS_H_ */ diff --git a/fs/sdcardfs/strtok.h b/fs/sdcardfs/strtok.h deleted file mode 100644 index 50ab25aa0bc4..000000000000 --- a/fs/sdcardfs/strtok.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * fs/sdcardfs/strtok.h - * - * Copyright (c) 2013 Samsung Electronics Co. Ltd - * Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun, - * Sunghwan Yun, Sungjong Seo - * - * This program has been developed as a stackable file system based on - * the WrapFS which written by - * - * Copyright (c) 1998-2011 Erez Zadok - * Copyright (c) 2009 Shrikar Archak - * Copyright (c) 2003-2011 Stony Brook University - * Copyright (c) 2003-2011 The Research Foundation of SUNY - * - * This file is dual licensed. It may be redistributed and/or modified - * under the terms of the Apache 2.0 License OR version 2 of the GNU - * General Public License. - */ - -static char * -strtok_r(char *s, const char *delim, char **last) -{ - char *spanp; - int c, sc; - char *tok; - - - /* if (s == NULL && (s = *last) == NULL) - return NULL; */ - if (s == NULL) { - s = *last; - if (s == NULL) - return NULL; - } - - /* - * Skip (span) leading delimiters (s += strspn(s, delim), sort of). - */ -cont: - c = *s++; - for (spanp = (char *)delim; (sc = *spanp++) != 0;) { - if (c == sc) - goto cont; - } - - if (c == 0) { /* no non-delimiter characters */ - *last = NULL; - return NULL; - } - tok = s - 1; - - /* - * Scan token (scan for delimiters: s += strcspn(s, delim), sort of). - * Note that delim must have one NUL; we stop if we see that, too. - */ - for (;;) { - c = *s++; - spanp = (char *)delim; - do { - sc = *spanp++; - if (sc == c) { - if (c == 0) - s = NULL; - else - s[-1] = 0; - *last = s; - return tok; - } - } while (sc != 0); - } - - /* NOTREACHED */ -} - diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index f153ce1b8cf3..1d6490128c99 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -46,9 +46,6 @@ static void sdcardfs_put_super(struct super_block *sb) sdcardfs_set_lower_super(sb, NULL); atomic_dec(&s->s_active); - if(spd->pkgl_id) - packagelist_destroy(spd->pkgl_id); - kfree(spd); sb->s_fs_info = NULL; } @@ -203,12 +200,8 @@ static int sdcardfs_show_options(struct seq_file *m, struct dentry *root) if (opts->fs_low_gid != 0) seq_printf(m, ",gid=%u", opts->fs_low_gid); - if (opts->derive == DERIVE_NONE) - seq_printf(m, ",derive=none"); - else if (opts->derive == DERIVE_LEGACY) - seq_printf(m, ",derive=legacy"); - else if (opts->derive == DERIVE_UNIFIED) - seq_printf(m, ",derive=unified"); + if (opts->multiuser) + seq_printf(m, ",multiuser"); if (opts->reserved_mb != 0) seq_printf(m, ",reserved=%uMB", opts->reserved_mb); -- GitLab From dab6f5031d0470427d90eab3834cdabf354458f2 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 11 Feb 2016 16:44:15 -0800 Subject: [PATCH 0989/1442] ANDROID: vfs: add d_canonical_path for stacked filesystem support Inotify does not currently know when a filesystem is acting as a wrapper around another fs. This means that inotify watchers will miss any modifications to the base file, as well as any made in a separate stacked fs that points to the same file. d_canonical_path solves this problem by allowing the fs to map a dentry to a path in the lower fs. Inotify can use it to find the appropriate place to watch to be informed of all changes to a file. Change-Id: I09563baffad1711a045e45c1bd0bd8713c2cc0b6 Signed-off-by: Daniel Rosenberg --- fs/notify/inotify/inotify_user.c | 17 ++++++++++++++--- include/linux/dcache.h | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 69d1ea3d292a..6413928357be 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -702,6 +702,8 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, struct fsnotify_group *group; struct inode *inode; struct path path; + struct path alteredpath; + struct path *canonical_path = &path; struct fd f; int ret; unsigned flags = 0; @@ -741,13 +743,22 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, if (ret) goto fput_and_out; + /* support stacked filesystems */ + if(path.dentry && path.dentry->d_op) { + if (path.dentry->d_op->d_canonical_path) { + path.dentry->d_op->d_canonical_path(path.dentry, &alteredpath); + canonical_path = &alteredpath; + path_put(&path); + } + } + /* inode held in place by reference to path; group by fget on fd */ - inode = path.dentry->d_inode; + inode = canonical_path->dentry->d_inode; group = f.file->private_data; /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); - path_put(&path); + path_put(canonical_path); fput_and_out: fdput(f); return ret; @@ -814,7 +825,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); - BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); + BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22); inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 5beed7b30561..8f369acc1a94 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -142,6 +142,7 @@ struct dentry_operations { int (*d_manage)(struct dentry *, bool); struct dentry *(*d_real)(struct dentry *, const struct inode *, unsigned int); + void (*d_canonical_path)(const struct dentry *, struct path *); } ____cacheline_aligned; /* -- GitLab From a782a7946001d93a9d96523c9fc8bbcd32ef9d83 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 11 Feb 2016 16:53:36 -0800 Subject: [PATCH 0990/1442] ANDROID: sdcardfs: Add support for d_canonical_path Change-Id: I5d6f0e71b8ca99aec4b0894412f1dfd1cfe12add Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/dentry.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index dbbcfd091fc7..ba165ef11e27 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -177,5 +177,6 @@ const struct dentry_operations sdcardfs_ci_dops = { .d_release = sdcardfs_d_release, .d_hash = sdcardfs_hash_ci, .d_compare = sdcardfs_cmp_ci, + .d_canonical_path = sdcardfs_get_real_lower, }; -- GitLab From d335459377f719c402ac199937a72a2d4632f8d4 Mon Sep 17 00:00:00 2001 From: Thierry Strudel Date: Wed, 23 Mar 2016 10:02:15 -0700 Subject: [PATCH 0991/1442] ANDROID: trace: cpufreq: fix typo in min/max cpufreq Change-Id: Ieed402d3a912b7a318826e101efe2c24b07ebfe4 Signed-off-by: Thierry Strudel --- include/trace/events/power.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/power.h b/include/trace/events/power.h index ed0919edd1b6..070be71b1aac 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -162,7 +162,7 @@ TRACE_EVENT(cpu_frequency_limits, TP_fast_assign( __entry->min_freq = min_freq; - __entry->max_freq = min_freq; + __entry->max_freq = max_freq; __entry->cpu_id = cpu_id; ), -- GitLab From 42a0b5740ec7d0ad4740a64b6cff35091f23a96a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 23 Mar 2016 08:32:23 -0700 Subject: [PATCH 0992/1442] ANDROID: fs: sdcardfs: Declare LOOKUP_CASE_INSENSITIVE unconditionally Attempts to build sdcardfs as module fail with fs/sdcardfs/lookup.c: In function '__sdcardfs_lookup': fs/sdcardfs/lookup.c:243:5: error: 'LOOKUP_CASE_INSENSITIVE' undeclared This occurs because the define is enclosed with #ifdef CONFIG_SDCARD_FS_CI_SEARCH. If SDCARD_FS_CI_SEARCH is configured to be built as module, this does not work. Alternatives would be to use #if IS_ENABLED(CONFIG_SDCARD_FS_CI_SEARCH), or to declare SDCARD_FS_CI_SEARCH as bool, but that does not work because the define is used unconditionally in the source. Note that LOOKUP_CASE_INSENSITIVE is only set but not evaluated in the current source code, so setting the flag has no real effect. Fixes: 84a1b7d3d312 ("Included sdcardfs source code for kernel 3.0") Cc: Daniel Rosenberg Signed-off-by: Guenter Roeck --- include/linux/namei.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/namei.h b/include/linux/namei.h index b3cb8dbc623d..237c3fccb9eb 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -44,9 +44,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_JUMPED 0x1000 #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 -#ifdef CONFIG_SDCARD_FS_CI_SEARCH #define LOOKUP_CASE_INSENSITIVE 0x8000 -#endif extern int path_pts(struct path *path); -- GitLab From 835d38b4b5c3f9b82c62c2e9e9e2bbb0ab49ae16 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 23 Mar 2016 12:09:25 -0700 Subject: [PATCH 0993/1442] ANDROID: inotify: Fix erroneous update of bit count Patch "vfs: add d_canonical_path for stacked filesystem support" erroneously updated the ALL_INOTIFY_BITS count. This changes it back Change-Id: Idb04edc736da276159d30f04c40cff9d6b1e070f --- fs/notify/inotify/inotify_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 6413928357be..c4c4504bb13c 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -825,7 +825,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); - BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22); + BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); -- GitLab From 4665ac899d5381a08dd56161320101f4745a8237 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 23 Mar 2016 16:39:30 -0700 Subject: [PATCH 0994/1442] ANDROID: sdcardfs: remove effectless config option CONFIG_SDCARD_FS_CI_SEARCH only guards a define for LOOKUP_CASE_INSENSITIVE, which is never used in the kernel. Remove both, along with the option matching that supports it. Change-Id: I363a8f31de8ee7a7a934d75300cc9ba8176e2edf Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/Kconfig | 5 ----- fs/sdcardfs/lookup.c | 7 +------ fs/sdcardfs/main.c | 15 --------------- fs/sdcardfs/sdcardfs.h | 6 ------ include/linux/namei.h | 1 - 5 files changed, 1 insertion(+), 33 deletions(-) diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig index ab25f88ebb37..a1c103316ac7 100644 --- a/fs/sdcardfs/Kconfig +++ b/fs/sdcardfs/Kconfig @@ -11,8 +11,3 @@ config SDCARD_FS_FADV_NOACTIVE default y help Sdcardfs supports fadvise noactive mode. - -config SDCARD_FS_CI_SEARCH - tristate "sdcardfs case-insensitive search support" - depends on SDCARD_FS - default y diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index f80abcb6b467..a01b06a514fd 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -238,13 +238,8 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, lower_dir_mnt = lower_parent_path->mnt; /* Use vfs_path_lookup to check if the dentry exists or not */ - if (sbi->options.lower_fs == LOWER_FS_EXT4) { - err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, - LOOKUP_CASE_INSENSITIVE, &lower_path); - } else if (sbi->options.lower_fs == LOWER_FS_FAT) { - err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, + err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, &lower_path); - } /* no error: handle positive dentries */ if (!err) { diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 80aa355d801e..fa11a0458b84 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -41,7 +41,6 @@ static const match_table_t sdcardfs_tokens = { {Opt_fsgid, "fsgid=%u"}, {Opt_gid, "gid=%u"}, {Opt_debug, "debug"}, - {Opt_lower_fs, "lower_fs=%s"}, {Opt_mask, "mask=%u"}, {Opt_userid, "userid=%d"}, {Opt_multiuser, "multiuser"}, @@ -64,8 +63,6 @@ static int parse_options(struct super_block *sb, char *options, int silent, opts->multiuser = false; opts->fs_user_id = 0; opts->gid = 0; - /* by default, we use LOWER_FS_EXT4 as lower fs type */ - opts->lower_fs = LOWER_FS_EXT4; /* by default, 0MB is reserved */ opts->reserved_mb = 0; @@ -113,18 +110,6 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_multiuser: opts->multiuser = true; break; - case Opt_lower_fs: - string_option = match_strdup(&args[0]); - if (!strcmp("ext4", string_option)) { - opts->lower_fs = LOWER_FS_EXT4; - } else if (!strcmp("fat", string_option)) { - opts->lower_fs = LOWER_FS_FAT; - } else { - kfree(string_option); - goto invalid_option; - } - kfree(string_option); - break; case Opt_reserved_mb: if (match_int(&args[0], &option)) return 0; diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 1b85f4e70324..f111f898b630 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -123,11 +123,6 @@ typedef enum { PERM_ANDROID_MEDIA, } perm_t; -typedef enum { - LOWER_FS_EXT4, - LOWER_FS_FAT, -} lower_fs_t; - struct sdcardfs_sb_info; struct sdcardfs_mount_options; @@ -191,7 +186,6 @@ struct sdcardfs_mount_options { gid_t fs_low_gid; userid_t fs_user_id; gid_t gid; - lower_fs_t lower_fs; mode_t mask; bool multiuser; unsigned int reserved_mb; diff --git a/include/linux/namei.h b/include/linux/namei.h index 237c3fccb9eb..a2866f6073e1 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -44,7 +44,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_JUMPED 0x1000 #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 -#define LOOKUP_CASE_INSENSITIVE 0x8000 extern int path_pts(struct path *path); -- GitLab From 671662bd634860a57ec84e487ab68b9d4c737092 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 24 Mar 2016 10:32:35 -0700 Subject: [PATCH 0995/1442] ANDROID: fs: Export d_absolute_path The 0-day build bot reports the following build error, seen if SDCARD_FS is built as module. ERROR: "d_absolute_path" undefined! Fixes: 84a1b7d3d312 ("Included sdcardfs source code for kernel 3.0") Reported-by: Fengguang Wu Signed-off-by: Guenter Roeck --- fs/dcache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/dcache.c b/fs/dcache.c index 5c7cc953ac81..972741b98ee9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3184,6 +3184,7 @@ char *d_absolute_path(const struct path *path, return ERR_PTR(error); return res; } +EXPORT_SYMBOL(d_absolute_path); /* * same as __d_path but appends "(deleted)" for unlinked files. -- GitLab From 04c3dee9df3df605af66a99e30626c6e19bf9150 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 28 Mar 2016 15:00:20 -0700 Subject: [PATCH 0996/1442] ANDROID: sdcardfs: Remove unused code Change-Id: Ie97cba27ce44818ac56cfe40954f164ad44eccf6 --- fs/sdcardfs/main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index fa11a0458b84..a6522286d731 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -54,7 +54,6 @@ static int parse_options(struct super_block *sb, char *options, int silent, char *p; substring_t args[MAX_OPT_ARGS]; int option; - char *string_option; /* by default, we use AID_MEDIA_RW as uid, gid */ opts->fs_low_uid = AID_MEDIA_RW; @@ -117,7 +116,6 @@ static int parse_options(struct super_block *sb, char *options, int silent, break; /* unknown option */ default: -invalid_option: if (!silent) { printk( KERN_ERR "Unrecognized mount option \"%s\" " "or missing value", p); -- GitLab From cfdb66543ef07932cdeef0dee9f3e8ff779143ae Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 28 Mar 2016 16:00:34 -0700 Subject: [PATCH 0997/1442] ANDROID: sdcardfs: remove unneeded __init and __exit Change-Id: I2a2d45d52f891332174c3000e8681c5167c1564f --- fs/sdcardfs/packagelist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index ba3478d94107..10f0d6be718b 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -392,7 +392,7 @@ static struct configfs_subsystem sdcardfs_packages_subsys = { }, }; -static int __init configfs_sdcardfs_init(void) +static int configfs_sdcardfs_init(void) { int ret; struct configfs_subsystem *subsys = &sdcardfs_packages_subsys; @@ -408,7 +408,7 @@ static int __init configfs_sdcardfs_init(void) return ret; } -static void __exit configfs_sdcardfs_exit(void) +static void configfs_sdcardfs_exit(void) { configfs_unregister_subsystem(&sdcardfs_packages_subsys); } -- GitLab From abfb48618f89e6acbe11abc534feea69d2155d70 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 24 Mar 2016 10:39:14 -0700 Subject: [PATCH 0998/1442] ANDROID: mm: Export do_munmap The 0-day build bot reports the following build error, seen if SDCARD_FS is built as module. ERROR: "do_munmap" undefined! Fixes: 84a1b7d3d312 ("Included sdcardfs source code for kernel 3.0") Reported-by: Fengguang Wu Signed-off-by: Guenter Roeck --- mm/mmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/mmap.c b/mm/mmap.c index 590df3862840..143d62f5fc5c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2672,6 +2672,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) return 0; } +EXPORT_SYMBOL(do_munmap); int vm_munmap(unsigned long start, size_t len) { -- GitLab From dc99440ec4634b0aad1c11807c3cb6510b4f6a4b Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 30 Mar 2016 14:10:13 -0700 Subject: [PATCH 0999/1442] ANDROID: dm verity fec: add sysfs attribute fec/corrected Add a sysfs entry that allows user space to determine whether dm-verity has come across correctable errors on the underlying block device. Bug: 22655252 Bug: 27928374 Change-Id: I80547a2aa944af2fb9ffde002650482877ade31b Signed-off-by: Sami Tolvanen (cherry picked from commit 7911fad5f0a2cf5afc2215657219a21e6630e001) --- drivers/md/dm-verity-fec.c | 45 +++++++++++++++++++++++++++++++++++++- drivers/md/dm-verity-fec.h | 4 ++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 0f0eb8a3d922..0e79f38cb647 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -11,6 +11,7 @@ #include "dm-verity-fec.h" #include +#include #define DM_MSG_PREFIX "verity-fec" @@ -175,9 +176,11 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, if (r < 0 && neras) DMERR_LIMIT("%s: FEC %llu: failed to correct: %d", v->data_dev->name, (unsigned long long)rsb, r); - else if (r > 0) + else if (r > 0) { DMWARN_LIMIT("%s: FEC %llu: corrected %d errors", v->data_dev->name, (unsigned long long)rsb, r); + atomic_add_unless(&v->fec->corrected, 1, INT_MAX); + } return r; } @@ -546,6 +549,7 @@ unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz, void verity_fec_dtr(struct dm_verity *v) { struct dm_verity_fec *f = v->fec; + struct kobject *kobj = &f->kobj_holder.kobj; if (!verity_fec_is_enabled(v)) goto out; @@ -562,6 +566,12 @@ void verity_fec_dtr(struct dm_verity *v) if (f->dev) dm_put_device(v->ti, f->dev); + + if (kobj->state_initialized) { + kobject_put(kobj); + wait_for_completion(dm_get_completion_from_kobject(kobj)); + } + out: kfree(f); v->fec = NULL; @@ -650,6 +660,27 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, return 0; } +static ssize_t corrected_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct dm_verity_fec *f = container_of(kobj, struct dm_verity_fec, + kobj_holder.kobj); + + return sprintf(buf, "%d\n", atomic_read(&f->corrected)); +} + +static struct kobj_attribute attr_corrected = __ATTR_RO(corrected); + +static struct attribute *fec_attrs[] = { + &attr_corrected.attr, + NULL +}; + +static struct kobj_type fec_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .default_attrs = fec_attrs +}; + /* * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr. */ @@ -673,8 +704,10 @@ int verity_fec_ctr_alloc(struct dm_verity *v) */ int verity_fec_ctr(struct dm_verity *v) { + int r; struct dm_verity_fec *f = v->fec; struct dm_target *ti = v->ti; + struct mapped_device *md = dm_table_get_md(ti->table); u64 hash_blocks; if (!verity_fec_is_enabled(v)) { @@ -682,6 +715,16 @@ int verity_fec_ctr(struct dm_verity *v) return 0; } + /* Create a kobject and sysfs attributes */ + init_completion(&f->kobj_holder.completion); + + r = kobject_init_and_add(&f->kobj_holder.kobj, &fec_ktype, + &disk_to_dev(dm_disk(md))->kobj, "%s", "fec"); + if (r) { + ti->error = "Cannot create kobject"; + return r; + } + /* * FEC is computed over data blocks, possible metadata, and * hash blocks. In other words, FEC covers total of fec_blocks diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h index 7fa0298b995e..a22098db08fd 100644 --- a/drivers/md/dm-verity-fec.h +++ b/drivers/md/dm-verity-fec.h @@ -12,6 +12,8 @@ #ifndef DM_VERITY_FEC_H #define DM_VERITY_FEC_H +#include "dm.h" +#include "dm-core.h" #include "dm-verity.h" #include @@ -48,6 +50,8 @@ struct dm_verity_fec { mempool_t *extra_pool; /* mempool for extra buffers */ mempool_t *output_pool; /* mempool for output */ struct kmem_cache *cache; /* cache for buffers */ + atomic_t corrected; /* corrected errors */ + struct dm_kobject_holder kobj_holder; /* for sysfs attributes */ }; /* per-bio data */ -- GitLab From 92f31300b40c85995e9a151a2aabb3665c88411e Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Thu, 28 Jan 2016 11:12:25 -0800 Subject: [PATCH 1000/1442] ANDROID: mmc: Add CONFIG_MMC_SIMULATE_MAX_SPEED When CONFIG_MMC_SIMULATE_MAX_SPEED is enabled, Expose max_read_speed, max_write_speed and cache_size default module parameters and sysfs controls to simulate a slow eMMC device. Default values are 0 (off), 0 (off) and 4 MB respectively. Signed-off-by: Mark Salyzyn Bug: 26976972 Change-Id: I342bfbd8b85f9b790e3f0e1e4e51a900ae07e05d --- Documentation/block/00-INDEX | 6 + Documentation/block/mmc-max-speed.txt | 38 ++++ drivers/mmc/card/Kconfig | 12 ++ drivers/mmc/card/block.c | 300 ++++++++++++++++++++++++++ drivers/mmc/card/queue.h | 8 + 5 files changed, 364 insertions(+) create mode 100644 Documentation/block/mmc-max-speed.txt diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX index e55103ace382..a542b9f2a30d 100644 --- a/Documentation/block/00-INDEX +++ b/Documentation/block/00-INDEX @@ -30,3 +30,9 @@ switching-sched.txt - Switching I/O schedulers at runtime writeback_cache_control.txt - Control of volatile write back caches +mmc-max-speed.txt + - eMMC layer speed simulation, related to /sys/block/mmcblk*/ + attributes: + max_read_speed + max_write_speed + cache_size diff --git a/Documentation/block/mmc-max-speed.txt b/Documentation/block/mmc-max-speed.txt new file mode 100644 index 000000000000..3f052b9fb999 --- /dev/null +++ b/Documentation/block/mmc-max-speed.txt @@ -0,0 +1,38 @@ +eMMC Block layer simulation speed controls in /sys/block/mmcblk*/ +=============================================== + +Turned on with CONFIG_MMC_SIMULATE_MAX_SPEED which enables MMC device speed +limiting. Used to test and simulate the behavior of the system when +confronted with a slow MMC. + +Enables max_read_speed, max_write_speed and cache_size attributes and module +default parameters to control the write or read maximum KB/second speed +behaviors. + +NB: There is room for improving the algorithm for aspects tied directly to +eMMC specific behavior. For instance, wear leveling and stalls from an +exhausted erase pool. We would expect that if there was a need to provide +similar speed simulation controls to other types of block devices, aspects of +their behavior are modelled separately (e.g. head seek times, heat assist, +shingling and rotational latency). + +/sys/block/mmcblk0/max_read_speed: + +Number of KB/second reads allowed to the block device. Used to test and +simulate the behavior of the system when confronted with a slow reading MMC. +Set to 0 or "off" to place no speed limit. + +/sys/block/mmcblk0/max_write_speed: + +Number of KB/second writes allowed to the block device. Used to test and +simulate the behavior of the system when confronted with a slow writing MMC. +Set to 0 or "off" to place no speed limit. + +/sys/block/mmcblk0/cache_size: + +Number of MB of high speed memory or high speed SLC cache expected on the +eMMC device being simulated. Used to help simulate the write-back behavior +more accurately. The assumption is the cache has no delay, but draws down +in the background to the MLC/TLC primary store at the max_write_speed rate. +Any write speed delays will show up when the cache is full, or when an I/O +request to flush is issued. diff --git a/drivers/mmc/card/Kconfig b/drivers/mmc/card/Kconfig index 5562308699bc..6142ec1b9dfb 100644 --- a/drivers/mmc/card/Kconfig +++ b/drivers/mmc/card/Kconfig @@ -68,3 +68,15 @@ config MMC_TEST This driver is only of interest to those developing or testing a host driver. Most people should say N here. + +config MMC_SIMULATE_MAX_SPEED + bool "Turn on maximum speed control per block device" + depends on MMC_BLOCK + help + Say Y here to enable MMC device speed limiting. Used to test and + simulate the behavior of the system when confronted with a slow MMC. + + Enables max_read_speed, max_write_speed and cache_size attributes to + control the write or read maximum KB/second speed behaviors. + + If unsure, say N here. diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 709a872ed484..817fcf8c0ac6 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -287,6 +287,250 @@ static ssize_t force_ro_store(struct device *dev, struct device_attribute *attr, return ret; } +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + +static int max_read_speed, max_write_speed, cache_size = 4; + +module_param(max_read_speed, int, S_IRUSR | S_IRGRP); +MODULE_PARM_DESC(max_read_speed, "maximum KB/s read speed 0=off"); +module_param(max_write_speed, int, S_IRUSR | S_IRGRP); +MODULE_PARM_DESC(max_write_speed, "maximum KB/s write speed 0=off"); +module_param(cache_size, int, S_IRUSR | S_IRGRP); +MODULE_PARM_DESC(cache_size, "MB high speed memory or SLC cache"); + +/* + * helper macros and expectations: + * size - unsigned long number of bytes + * jiffies - unsigned long HZ timestamp difference + * speed - unsigned KB/s transfer rate + */ +#define size_and_speed_to_jiffies(size, speed) \ + ((size) * HZ / (speed) / 1024UL) +#define jiffies_and_speed_to_size(jiffies, speed) \ + (((speed) * (jiffies) * 1024UL) / HZ) +#define jiffies_and_size_to_speed(jiffies, size) \ + ((size) * HZ / (jiffies) / 1024UL) + +/* Limits to report warning */ +/* jiffies_and_size_to_speed(10*HZ, queue_max_hw_sectors(q) * 512UL) ~ 25 */ +#define MIN_SPEED(q) 250 /* 10 times faster than a floppy disk */ +#define MAX_SPEED(q) jiffies_and_size_to_speed(1, queue_max_sectors(q) * 512UL) + +#define speed_valid(speed) ((speed) > 0) + +static const char off[] = "off\n"; + +static int max_speed_show(int speed, char *buf) +{ + if (speed) + return scnprintf(buf, PAGE_SIZE, "%uKB/s\n", speed); + else + return scnprintf(buf, PAGE_SIZE, off); +} + +static int max_speed_store(const char *buf, struct request_queue *q) +{ + unsigned int limit, set = 0; + + if (!strncasecmp(off, buf, sizeof(off) - 2)) + return set; + if (kstrtouint(buf, 0, &set) || (set > INT_MAX)) + return -EINVAL; + if (set == 0) + return set; + limit = MAX_SPEED(q); + if (set > limit) + pr_warn("max speed %u ineffective above %u\n", set, limit); + limit = MIN_SPEED(q); + if (set < limit) + pr_warn("max speed %u painful below %u\n", set, limit); + return set; +} + +static ssize_t max_write_speed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + int ret = max_speed_show(atomic_read(&md->queue.max_write_speed), buf); + + mmc_blk_put(md); + return ret; +} + +static ssize_t max_write_speed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + int set = max_speed_store(buf, md->queue.queue); + + if (set < 0) { + mmc_blk_put(md); + return set; + } + + atomic_set(&md->queue.max_write_speed, set); + mmc_blk_put(md); + return count; +} + +static const DEVICE_ATTR(max_write_speed, S_IRUGO | S_IWUSR, + max_write_speed_show, max_write_speed_store); + +static ssize_t max_read_speed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + int ret = max_speed_show(atomic_read(&md->queue.max_read_speed), buf); + + mmc_blk_put(md); + return ret; +} + +static ssize_t max_read_speed_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + int set = max_speed_store(buf, md->queue.queue); + + if (set < 0) { + mmc_blk_put(md); + return set; + } + + atomic_set(&md->queue.max_read_speed, set); + mmc_blk_put(md); + return count; +} + +static const DEVICE_ATTR(max_read_speed, S_IRUGO | S_IWUSR, + max_read_speed_show, max_read_speed_store); + +static ssize_t cache_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); + struct mmc_queue *mq = &md->queue; + int cache_size = atomic_read(&mq->cache_size); + int ret; + + if (!cache_size) + ret = scnprintf(buf, PAGE_SIZE, off); + else { + int speed = atomic_read(&mq->max_write_speed); + + if (!speed_valid(speed)) + ret = scnprintf(buf, PAGE_SIZE, "%uMB\n", cache_size); + else { /* We accept race between cache_jiffies and cache_used */ + unsigned long size = jiffies_and_speed_to_size( + jiffies - mq->cache_jiffies, speed); + long used = atomic_long_read(&mq->cache_used); + + if (size >= used) + size = 0; + else + size = (used - size) * 100 / cache_size + / 1024UL / 1024UL; + + ret = scnprintf(buf, PAGE_SIZE, "%uMB %lu%% used\n", + cache_size, size); + } + } + + mmc_blk_put(md); + return ret; +} + +static ssize_t cache_size_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mmc_blk_data *md; + unsigned int set = 0; + + if (strncasecmp(off, buf, sizeof(off) - 2) + && (kstrtouint(buf, 0, &set) || (set > INT_MAX))) + return -EINVAL; + + md = mmc_blk_get(dev_to_disk(dev)); + atomic_set(&md->queue.cache_size, set); + mmc_blk_put(md); + return count; +} + +static const DEVICE_ATTR(cache_size, S_IRUGO | S_IWUSR, + cache_size_show, cache_size_store); + +/* correct for write-back */ +static long mmc_blk_cache_used(struct mmc_queue *mq, unsigned long waitfor) +{ + long used = 0; + int speed = atomic_read(&mq->max_write_speed); + + if (speed_valid(speed)) { + unsigned long size = jiffies_and_speed_to_size( + waitfor - mq->cache_jiffies, speed); + used = atomic_long_read(&mq->cache_used); + + if (size >= used) + used = 0; + else + used -= size; + } + + atomic_long_set(&mq->cache_used, used); + mq->cache_jiffies = waitfor; + + return used; +} + +static void mmc_blk_simulate_delay( + struct mmc_queue *mq, + struct request *req, + unsigned long waitfor) +{ + int max_speed; + + if (!req) + return; + + max_speed = (rq_data_dir(req) == READ) + ? atomic_read(&mq->max_read_speed) + : atomic_read(&mq->max_write_speed); + if (speed_valid(max_speed)) { + unsigned long bytes = blk_rq_bytes(req); + + if (rq_data_dir(req) != READ) { + int cache_size = atomic_read(&mq->cache_size); + + if (cache_size) { + unsigned long size = cache_size * 1024L * 1024L; + long used = mmc_blk_cache_used(mq, waitfor); + + used += bytes; + atomic_long_set(&mq->cache_used, used); + bytes = 0; + if (used > size) + bytes = used - size; + } + } + waitfor += size_and_speed_to_jiffies(bytes, max_speed); + if (time_is_after_jiffies(waitfor)) { + long msecs = jiffies_to_msecs(waitfor - jiffies); + + if (likely(msecs > 0)) + msleep(msecs); + } + } +} + +#else + +#define mmc_blk_simulate_delay(mq, req, waitfor) + +#endif + static int mmc_blk_open(struct block_device *bdev, fmode_t mode) { struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk); @@ -1284,6 +1528,23 @@ static int mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req) if (ret) ret = -EIO; +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + else if (atomic_read(&mq->cache_size)) { + long used = mmc_blk_cache_used(mq, jiffies); + + if (used) { + int speed = atomic_read(&mq->max_write_speed); + + if (speed_valid(speed)) { + unsigned long msecs = jiffies_to_msecs( + size_and_speed_to_jiffies( + used, speed)); + if (msecs) + msleep(msecs); + } + } + } +#endif blk_end_request_all(req, ret); return ret ? 0 : 1; @@ -1965,6 +2226,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) struct mmc_async_req *areq; const u8 packed_nr = 2; u8 reqs = 0; +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + unsigned long waitfor = jiffies; +#endif if (!rqc && !mq->mqrq_prev->req) return 0; @@ -2015,6 +2279,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) */ mmc_blk_reset_success(md, type); + mmc_blk_simulate_delay(mq, rqc, waitfor); + if (mmc_packed_cmd(mq_rq->cmd_type)) { ret = mmc_blk_end_packed_req(mq_rq); break; @@ -2437,6 +2703,14 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md) card->ext_csd.boot_ro_lockable) device_remove_file(disk_to_dev(md->disk), &md->power_ro_lock); +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + device_remove_file(disk_to_dev(md->disk), + &dev_attr_max_write_speed); + device_remove_file(disk_to_dev(md->disk), + &dev_attr_max_read_speed); + device_remove_file(disk_to_dev(md->disk), + &dev_attr_cache_size); +#endif del_gendisk(md->disk); } @@ -2471,6 +2745,24 @@ static int mmc_add_disk(struct mmc_blk_data *md) ret = device_create_file(disk_to_dev(md->disk), &md->force_ro); if (ret) goto force_ro_fail; +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + atomic_set(&md->queue.max_write_speed, max_write_speed); + ret = device_create_file(disk_to_dev(md->disk), + &dev_attr_max_write_speed); + if (ret) + goto max_write_speed_fail; + atomic_set(&md->queue.max_read_speed, max_read_speed); + ret = device_create_file(disk_to_dev(md->disk), + &dev_attr_max_read_speed); + if (ret) + goto max_read_speed_fail; + atomic_set(&md->queue.cache_size, cache_size); + atomic_long_set(&md->queue.cache_used, 0); + md->queue.cache_jiffies = jiffies; + ret = device_create_file(disk_to_dev(md->disk), &dev_attr_cache_size); + if (ret) + goto cache_size_fail; +#endif if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) && card->ext_csd.boot_ro_lockable) { @@ -2495,6 +2787,14 @@ static int mmc_add_disk(struct mmc_blk_data *md) return ret; power_ro_lock_fail: +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + device_remove_file(disk_to_dev(md->disk), &dev_attr_cache_size); +cache_size_fail: + device_remove_file(disk_to_dev(md->disk), &dev_attr_max_read_speed); +max_read_speed_fail: + device_remove_file(disk_to_dev(md->disk), &dev_attr_max_write_speed); +max_write_speed_fail: +#endif device_remove_file(disk_to_dev(md->disk), &md->force_ro); force_ro_fail: del_gendisk(md->disk); diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h index 342f1e3f301e..fe58d31cbc7e 100644 --- a/drivers/mmc/card/queue.h +++ b/drivers/mmc/card/queue.h @@ -62,6 +62,14 @@ struct mmc_queue { struct mmc_queue_req mqrq[2]; struct mmc_queue_req *mqrq_cur; struct mmc_queue_req *mqrq_prev; +#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED + atomic_t max_write_speed; + atomic_t max_read_speed; + atomic_t cache_size; + /* i/o tracking */ + atomic_long_t cache_used; + unsigned long cache_jiffies; +#endif }; extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *, -- GitLab From 5b5ab94817f968509bd5d048569e7ad0186e724f Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 1 Oct 2015 10:44:36 +0530 Subject: [PATCH 1001/1442] ANDROID: netfilter: xt_qtaguid: seq_printf fixes Update seq_printf() usage in xt_qtaguid to align with changes from mainline commit 6798a8caaf64 "fs/seq_file: convert int seq_vprint/seq_printf/etc... returns to void". Signed-off-by: Amit Pundir --- net/netfilter/xt_qtaguid.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 04bb081adde8..e1442bfb668d 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -2543,7 +2543,6 @@ static void pp_stats_header(struct seq_file *m) static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry, int cnt_set) { - int ret; struct data_counters *cnts; tag_t tag = ts_entry->tn.tag; uid_t stat_uid = get_uid_from_tag(tag); @@ -2562,7 +2561,7 @@ static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry, } ppi->item_index++; cnts = &ts_entry->counters; - ret = seq_printf(m, "%d %s 0x%llx %u %u " + seq_printf(m, "%d %s 0x%llx %u %u " "%llu %llu " "%llu %llu " "%llu %llu " @@ -2592,7 +2591,7 @@ static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry, cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); - return ret ?: 1; + return seq_has_overflowed(m) ? -ENOSPC : 1; } static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry) -- GitLab From 973117c6617fdaed5ad7add548dec15962074198 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Fri, 22 Apr 2016 00:00:14 -0700 Subject: [PATCH 1002/1442] ANDROID: vfs: change d_canonical_path to take two paths bug: 23904372 Change-Id: I4a686d64b6de37decf60019be1718e1d820193e6 Signed-off-by: Daniel Rosenberg --- fs/notify/inotify/inotify_user.c | 2 +- fs/sdcardfs/dentry.c | 6 +++++- include/linux/dcache.h | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index c4c4504bb13c..4dc09da062c6 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -746,7 +746,7 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, /* support stacked filesystems */ if(path.dentry && path.dentry->d_op) { if (path.dentry->d_op->d_canonical_path) { - path.dentry->d_op->d_canonical_path(path.dentry, &alteredpath); + path.dentry->d_op->d_canonical_path(&path, &alteredpath); canonical_path = &alteredpath; path_put(&path); } diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index ba165ef11e27..971928ab6c21 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -172,11 +172,15 @@ static int sdcardfs_cmp_ci(const struct dentry *parent, return 1; } +static void sdcardfs_canonical_path(const struct path *path, struct path *actual_path) { + sdcardfs_get_real_lower(path->dentry, actual_path); +} + const struct dentry_operations sdcardfs_ci_dops = { .d_revalidate = sdcardfs_d_revalidate, .d_release = sdcardfs_d_release, .d_hash = sdcardfs_hash_ci, .d_compare = sdcardfs_cmp_ci, - .d_canonical_path = sdcardfs_get_real_lower, + .d_canonical_path = sdcardfs_canonical_path, }; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8f369acc1a94..9b0477ea5dd9 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -142,7 +142,7 @@ struct dentry_operations { int (*d_manage)(struct dentry *, bool); struct dentry *(*d_real)(struct dentry *, const struct inode *, unsigned int); - void (*d_canonical_path)(const struct dentry *, struct path *); + void (*d_canonical_path)(const struct path *, struct path *); } ____cacheline_aligned; /* -- GitLab From fac99a7b0010cafb5ba0df8e646695c3ae651678 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Fri, 22 Apr 2016 00:00:48 -0700 Subject: [PATCH 1003/1442] ANDROID: fuse: Add support for d_canonical_path Allows FUSE to report to inotify that it is acting as a layered filesystem. The userspace component returns a string representing the location of the underlying file. If the string cannot be resolved into a path, the top level path is returned instead. bug: 23904372 Change-Id: Iabdca0bbedfbff59e9c820c58636a68ef9683d9f Signed-off-by: Daniel Rosenberg --- fs/fuse/dev.c | 5 +++++ fs/fuse/dir.c | 46 +++++++++++++++++++++++++++++++++++++++ fs/fuse/fuse_i.h | 3 +++ include/uapi/linux/fuse.h | 1 + 4 files changed, 55 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index a9e1d708e1e9..e920bf06c1fa 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -1878,6 +1879,10 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, cs->move_pages = 0; err = copy_out_args(cs, &req->out, nbytes); + if (req->in.h.opcode == FUSE_CANONICAL_PATH) { + req->out.h.error = kern_path((char *)req->out.args[0].value, 0, + req->canonical_path); + } fuse_copy_finish(cs); spin_lock(&fpq->lock); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 096f79997f75..340a6185c455 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -262,6 +262,50 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) goto out; } +/* + * Get the canonical path. Since we must translate to a path, this must be done + * in the context of the userspace daemon, however, the userspace daemon cannot + * look up paths on its own. Instead, we handle the lookup as a special case + * inside of the write request. + */ +static void fuse_dentry_canonical_path(const struct path *path, struct path *canonical_path) { + struct inode *inode = path->dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + int err; + char *path_name; + + req = fuse_get_req(fc, 1); + err = PTR_ERR(req); + if (IS_ERR(req)) + goto default_path; + + path_name = (char*)__get_free_page(GFP_KERNEL); + if (!path_name) { + fuse_put_request(fc, req); + goto default_path; + } + + req->in.h.opcode = FUSE_CANONICAL_PATH; + req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 0; + req->out.numargs = 1; + req->out.args[0].size = PATH_MAX; + req->out.args[0].value = path_name; + req->canonical_path = canonical_path; + req->out.argvar = 1; + fuse_request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + free_page((unsigned long)path_name); + if (!err) + return; +default_path: + canonical_path->dentry = path->dentry; + canonical_path->mnt = path->mnt; + path_get(canonical_path); +} + static int invalid_nodeid(u64 nodeid) { return !nodeid || nodeid == FUSE_ROOT_ID; @@ -284,11 +328,13 @@ const struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, + .d_canonical_path = fuse_dentry_canonical_path, }; const struct dentry_operations fuse_root_dentry_operations = { .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, + .d_canonical_path = fuse_dentry_canonical_path, }; int fuse_valid_type(int m) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 91307940c8ac..6b30a12f898f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -368,6 +368,9 @@ struct fuse_req { /** Inode used in the request or NULL */ struct inode *inode; + /** Path used for completing d_canonical_path */ + struct path *canonical_path; + /** AIO control block */ struct fuse_io_priv *io; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 42fa977e3b14..093237817ed0 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -375,6 +375,7 @@ enum fuse_opcode { FUSE_READDIRPLUS = 44, FUSE_RENAME2 = 45, FUSE_LSEEK = 46, + FUSE_CANONICAL_PATH= 2016, /* CUSE specific operations */ CUSE_INIT = 4096, -- GitLab From 551dae64b75fc0f5a2bf08713f50186ce7b45ea9 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 22 Apr 2016 17:12:57 -0700 Subject: [PATCH 1004/1442] ANDROID: xt_qtaguid: Fix panic caused by synack processing In upstream commit ca6fb06518836ef9b65dc0aac02ff97704d52a05 (tcp: attach SYNACK messages to request sockets instead of listener) http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ca6fb0651883 The building of synack messages was changed, which made it so the skb->sk points to a casted request_sock. This is problematic, as there is no sk_socket in a request_sock. So when the qtaguid_mt function tries to access the sk->sk_socket, it accesses uninitialized memory. After looking at how other netfilter implementations handle this, I realized there was a skb_to_full_sk() helper added, which the xt_qtaguid code isn't yet using. This patch adds its use, and resovles panics seen when accessing uninitialzed memory when processing synack packets. Reported-by: YongQin Liu Signed-off-by: John Stultz --- net/netfilter/xt_qtaguid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index e1442bfb668d..822dc3c3bce1 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1689,7 +1689,7 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) /* default: Fall through and do UID releated work */ } - sk = skb->sk; + sk = skb_to_full_sk(skb); /* * When in TCP_TIME_WAIT the sk is not a "struct sock" but * "struct inet_timewait_sock" which is missing fields. -- GitLab From 93f1d0b105ea0ad9e972fb5dc05d7f6059bf3080 Mon Sep 17 00:00:00 2001 From: Yongqin Liu Date: Thu, 28 Apr 2016 13:53:36 +0800 Subject: [PATCH 1005/1442] ANDROID: quick selinux support for tracefs Here is just the quick fix for tracefs with selinux. just add tracefs to the list of whitelisted filesystem types in selinux_is_sblabel_mnt(), but the right fix would be to generalize this logic as described in the last item on the todo list, https://bitbucket.org/seandroid/wiki/wiki/ToDo Change-Id: I2aa803ccffbcd2802a7287514da7648e6b364157 Signed-off-by: Yongqin Liu --- security/selinux/hooks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 09fd6108e421..24bd84d26123 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -491,6 +491,7 @@ static int selinux_is_sblabel_mnt(struct super_block *sb) !strcmp(sb->s_type->name, "sysfs") || !strcmp(sb->s_type->name, "pstore") || !strcmp(sb->s_type->name, "debugfs") || + !strcmp(sb->s_type->name, "tracefs") || !strcmp(sb->s_type->name, "rootfs"); } -- GitLab From 35667e6f7d573496c75cb08d5d98429feae7ff47 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Wed, 23 Mar 2016 13:18:03 -0700 Subject: [PATCH 1006/1442] ANDROID: usb: dual-role: make stub functions inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_DUAL_ROLE_USB_INTF is disabled but the exported functions are referenced, the build will result in warnings such as: In file included from include/linux/usb/class-dual-role.h:112:13: warning: ‘dual_role_instance_changed’ defined but not used [-Wunused-function] These stub functions should be static inline. Change-Id: I5a9ef58dca32306fac5a4c7f28cdaa36fa8ae078 Signed-off-by: Jack Pham (cherry picked from commit 2d152dbb0743526b21d6bbefe097f874c027f860) (cherry picked from commit 8ad66cafaa10e6ba94ff79a8dbc2cc437c6bfe93) --- include/linux/usb/class-dual-role.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/usb/class-dual-role.h b/include/linux/usb/class-dual-role.h index af42ed34944a..c6df2238012e 100644 --- a/include/linux/usb/class-dual-role.h +++ b/include/linux/usb/class-dual-role.h @@ -109,18 +109,19 @@ extern int dual_role_property_is_writeable(struct dual_role_phy_instance enum dual_role_property prop); extern void *dual_role_get_drvdata(struct dual_role_phy_instance *dual_role); #else /* CONFIG_DUAL_ROLE_USB_INTF */ -static void dual_role_instance_changed(struct dual_role_phy_instance +static inline void dual_role_instance_changed(struct dual_role_phy_instance *dual_role){} -static struct dual_role_phy_instance *__must_check +static inline struct dual_role_phy_instance *__must_check devm_dual_role_instance_register(struct device *parent, const struct dual_role_phy_desc *desc) { return ERR_PTR(-ENOSYS); } -static void devm_dual_role_instance_unregister(struct device *dev, +static inline void devm_dual_role_instance_unregister(struct device *dev, struct dual_role_phy_instance *dual_role){} -static void *dual_role_get_drvdata(struct dual_role_phy_instance *dual_role) +static inline void *dual_role_get_drvdata(struct dual_role_phy_instance + *dual_role) { return ERR_PTR(-ENOSYS); } -- GitLab From caff55cc471458c6e5a5114db7313330fddb51a3 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 4 May 2016 13:51:38 -0700 Subject: [PATCH 1007/1442] ANDROID: fiq_debugger: Add option to apply uart overlay by FIQ_DEBUGGER_UART_OVERLAY fiq_debugger is taking over uart, so it is necessary to disable original uart in DT file. It can be done manually or by overlay. Change-Id: I9f50ec15b0e22e602d73b9f745fc8666f8925d09 Signed-off-by: Dmitry Shmidt --- drivers/staging/android/fiq_debugger/Kconfig | 9 ++++++ .../android/fiq_debugger/fiq_debugger.c | 30 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/drivers/staging/android/fiq_debugger/Kconfig b/drivers/staging/android/fiq_debugger/Kconfig index 56f7f999377e..60fc224d4efc 100644 --- a/drivers/staging/android/fiq_debugger/Kconfig +++ b/drivers/staging/android/fiq_debugger/Kconfig @@ -42,6 +42,15 @@ config FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE If enabled, this puts the fiq debugger into console mode by default. Otherwise, the fiq debugger will start out in debug mode. +config FIQ_DEBUGGER_UART_OVERLAY + bool "Install uart DT overlay" + depends on FIQ_DEBUGGER + select OF_OVERLAY + default n + help + If enabled, fiq debugger is calling fiq_debugger_uart_overlay() + that will apply overlay uart_overlay@0 to disable proper uart. + config FIQ_WATCHDOG bool select FIQ_DEBUGGER diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c index 7f056831dbff..0c558331a3d2 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -39,6 +39,10 @@ #include #endif +#ifdef CONFIG_FIQ_DEBUGGER_UART_OVERLAY +#include +#endif + #include #include "fiq_debugger.h" @@ -1201,10 +1205,36 @@ static struct platform_driver fiq_debugger_driver = { }, }; +#if defined(CONFIG_FIQ_DEBUGGER_UART_OVERLAY) +int fiq_debugger_uart_overlay(void) +{ + struct device_node *onp = of_find_node_by_path("/uart_overlay@0"); + int ret; + + if (!onp) { + pr_err("serial_debugger: uart overlay not found\n"); + return -ENODEV; + } + + ret = of_overlay_create(onp); + if (ret < 0) { + pr_err("serial_debugger: fail to create overlay: %d\n", ret); + of_node_put(onp); + return ret; + } + + pr_info("serial_debugger: uart overlay applied\n"); + return 0; +} +#endif + static int __init fiq_debugger_init(void) { #if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) fiq_debugger_tty_init(); +#endif +#if defined(CONFIG_FIQ_DEBUGGER_UART_OVERLAY) + fiq_debugger_uart_overlay(); #endif return platform_driver_register(&fiq_debugger_driver); } -- GitLab From ddcf65e87cf39ff11867a07e8b63bdf7b9313b9f Mon Sep 17 00:00:00 2001 From: Jimmy Perchet Date: Mon, 9 May 2016 10:32:04 -0700 Subject: [PATCH 1008/1442] FROMLIST: wlcore: Disable filtering in AP role When you configure (set it up) a STA interface, the driver install a multicast filter. This is normal behavior, when one application subscribe to multicast address the filter is updated. When Access Point interface is configured, there is no filter installation and the "filter update" path is disabled in the driver. The problem happens when you switch an interface from STA type to AP type. The filter is installed but there are no means to update it. Change-Id: Ied22323af831575303abd548574918baa9852dd0 Signed-off-by: Dmitry Shmidt --- drivers/net/wireless/ti/wlcore/init.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/ti/wlcore/init.c b/drivers/net/wireless/ti/wlcore/init.c index d0b7734030ef..b7974b4dbb34 100644 --- a/drivers/net/wireless/ti/wlcore/init.c +++ b/drivers/net/wireless/ti/wlcore/init.c @@ -549,6 +549,11 @@ static int wl12xx_init_ap_role(struct wl1271 *wl, struct wl12xx_vif *wlvif) { int ret; + /* Disable filtering */ + ret = wl1271_acx_group_address_tbl(wl, wlvif, false, NULL, 0); + if (ret < 0) + return ret; + ret = wl1271_acx_ap_max_tx_retry(wl, wlvif); if (ret < 0) return ret; -- GitLab From 023f8892563b9332d40762db521585838d083fd9 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 11 May 2016 11:01:02 -0700 Subject: [PATCH 1009/1442] ANDROID: fiq_debugger: Add fiq_debugger.disable option This change allows to use same kernel image with different console options for uart and fiq_debugger. If fiq_debugger.disable will be set to 1/y/Y, fiq_debugger will not be initialized. Change-Id: I71fda54f5f863d13b1437b1f909e52dd375d002d Signed-off-by: Dmitry Shmidt --- drivers/staging/android/fiq_debugger/fiq_debugger.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c index 0c558331a3d2..b132cff14f01 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -123,11 +123,13 @@ static bool initial_console_enable; #endif static bool fiq_kgdb_enable; +static bool fiq_debugger_disable; module_param_named(no_sleep, initial_no_sleep, bool, 0644); module_param_named(debug_enable, initial_debug_enable, bool, 0644); module_param_named(console_enable, initial_console_enable, bool, 0644); module_param_named(kgdb_enable, fiq_kgdb_enable, bool, 0644); +module_param_named(disable, fiq_debugger_disable, bool, 0644); #ifdef CONFIG_FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON static inline @@ -1230,6 +1232,10 @@ int fiq_debugger_uart_overlay(void) static int __init fiq_debugger_init(void) { + if (fiq_debugger_disable) { + pr_err("serial_debugger: disabled\n"); + return -ENODEV; + } #if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) fiq_debugger_tty_init(); #endif -- GitLab From 6978766a70bc25db3f5e3acbd4f070e45f482630 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 12 May 2016 11:17:52 -0700 Subject: [PATCH 1010/1442] ANDROID: xt_qtaguid: Fix panic caused by processing non-full socket. In an issue very similar to 4e461c777e3 (xt_qtaguid: Fix panic caused by synack processing), we were seeing panics on occasion in testing. In this case, it was the same issue, but caused by a different call path, as the sk being returned from qtaguid_find_sk() was not a full socket. Resulting in the sk->sk_socket deref to fail. This patch adds an extra check to ensure the sk being retuned is a full socket, and if not it returns NULL. Reported-by: Milosz Wasilewski Signed-off-by: John Stultz --- net/netfilter/xt_qtaguid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index 822dc3c3bce1..e2e7d54f9bb1 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1606,7 +1606,7 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb, * When in TCP_TIME_WAIT the sk is not a "struct sock" but * "struct inet_timewait_sock" which is missing fields. */ - if (sk->sk_state == TCP_TIME_WAIT) { + if (!sk_fullsock(sk) || sk->sk_state == TCP_TIME_WAIT) { sock_gen_put(sk); sk = NULL; } -- GitLab From 4d6ce9643d4d3e731807a433fffb38239c22bbd5 Mon Sep 17 00:00:00 2001 From: Winter Wang Date: Fri, 20 May 2016 11:05:00 +0800 Subject: [PATCH 1011/1442] ANDROID: usb: gadget: f_midi: set fi->f to NULL when free f_midi function fi->f is set in f_midi's alloc_func, need to clean this to NULL in free_func, otherwise on ConfigFS's function switch, midi->usb_function it self is freed, fi->f will be a wild pointer and run into below kernel panic: --------------- [ 58.950628] Unable to handle kernel paging request at virtual address 63697664 [ 58.957869] pgd = c0004000 [ 58.960583] [63697664] *pgd=00000000 [ 58.964185] Internal error: Oops: 80000005 [#1] PREEMPT SMP ARM [ 58.970111] Modules linked in: [ 58.973191] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.1.15-03504-g34c857c-dirty #89 [ 58.981024] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 58.987557] task: c110bd70 ti: c1100000 task.ti: c1100000 [ 58.992962] PC is at 0x63697664 [ 58.996120] LR is at android_setup+0x78/0x138 <..snip..> [ 60.044980] 1fc0: ffffffff ffffffff c1000684 00000000 00000000 c108ecd0 c11f7294 c11039c0 [ 60.053181] 1fe0: c108eccc c110d148 1000406a 412fc09a 00000000 1000807c 00000000 00000000 [ 60.061420] [] (android_setup) from [] (udc_irq+0x758/0x1034) [ 60.068951] [] (udc_irq) from [] (handle_irq_event_percpu+0x50/0x254) [ 60.077165] [] (handle_irq_event_percpu) from [] (handle_irq_event+0x3c/0x5c) [ 60.086072] [] (handle_irq_event) from [] (handle_fasteoi_irq+0xe0/0x198) [ 60.094630] [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x2c/0x3c) [ 60.103271] [] (generic_handle_irq) from [] (__handle_domain_irq+0x7c/0xec) [ 60.112000] [] (__handle_domain_irq) from [] (gic_handle_irq+0x24/0x5c) -------------- Signed-off-by: Winter Wang --- drivers/usb/gadget/function/f_midi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index f23a6f7cd347..a832d271739c 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c @@ -1265,6 +1265,7 @@ static void f_midi_free(struct usb_function *f) mutex_lock(&opts->lock); kfifo_free(&midi->in_req_fifo); kfree(midi); + opts->func_inst.f = NULL; --opts->refcnt; mutex_unlock(&opts->lock); } -- GitLab From e23ef5fc525fb11fccc07b49466e3e5f208fc2b5 Mon Sep 17 00:00:00 2001 From: Haojian Zhuang Date: Fri, 22 Apr 2016 17:23:29 +0800 Subject: [PATCH 1012/1442] ANDROID: arm64: add option to build Image-dtb Some bootloaders couldn't decompress Image.gz-dtb. Change-Id: I698cd0c4ee6894e8d0655d88f3ecf4826c28a645 Signed-off-by: Haojian Zhuang Signed-off-by: John Stultz Signed-off-by: Dmitry Shmidt --- arch/arm64/Makefile | 2 +- arch/arm64/boot/Makefile | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 085f79235be0..bba8d2cc7222 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -127,7 +127,7 @@ dtbs: prepare scripts dtbs_install: $(Q)$(MAKE) $(dtbinst)=$(boot)/dts -Image.gz-dtb: vmlinux scripts dtbs +Image-dtb Image.gz-dtb: vmlinux scripts dtbs $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ PHONY += vdso_install diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 5bb65a9e5d13..2c8cb864315e 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -34,6 +34,9 @@ $(obj)/Image: vmlinux FORCE $(obj)/Image.bz2: $(obj)/Image FORCE $(call if_changed,bzip2) +$(obj)/Image-dtb: $(obj)/Image $(DTB_OBJS) FORCE + $(call if_changed,cat) + $(obj)/Image.gz: $(obj)/Image FORCE $(call if_changed,gzip) -- GitLab From 56b70ac2447f0b900c0432451f706bbff5dc261f Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 24 May 2016 14:41:57 -0700 Subject: [PATCH 1013/1442] ANDROID: ARM64: Ignore Image-dtb from git point of view Change-Id: I5bbf1db90f28ea956383b4a5d91ad508eea656dc Signed-off-by: Dmitry Shmidt --- arch/arm64/boot/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore index eb3551131b1e..34e35209fc2e 100644 --- a/arch/arm64/boot/.gitignore +++ b/arch/arm64/boot/.gitignore @@ -1,3 +1,4 @@ Image +Image-dtb Image.gz Image.gz-dtb -- GitLab From d28f856d2cf735bd10014a73373cf7544cec20d6 Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Sun, 29 May 2016 14:22:32 -0700 Subject: [PATCH 1014/1442] FROMLIST: security,perf: Allow further restriction of perf_event_open When kernel.perf_event_open is set to 3 (or greater), disallow all access to performance events by users without CAP_SYS_ADMIN. Add a Kconfig symbol CONFIG_SECURITY_PERF_EVENTS_RESTRICT that makes this value the default. This is based on a similar feature in grsecurity (CONFIG_GRKERNSEC_PERF_HARDEN). This version doesn't include making the variable read-only. It also allows enabling further restriction at run-time regardless of whether the default is changed. https://lkml.org/lkml/2016/1/11/587 Signed-off-by: Ben Hutchings Bug: 29054680 Change-Id: Iff5bff4fc1042e85866df9faa01bce8d04335ab8 --- Documentation/sysctl/kernel.txt | 4 +++- include/linux/perf_event.h | 5 +++++ kernel/events/core.c | 8 ++++++++ security/Kconfig | 9 +++++++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index ffab8b5caa60..52daff6d09fb 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -659,12 +659,14 @@ allowed to execute. perf_event_paranoid: Controls use of the performance events system by unprivileged -users (without CAP_SYS_ADMIN). The default value is 2. +users (without CAP_SYS_ADMIN). The default value is 3 if +CONFIG_SECURITY_PERF_EVENTS_RESTRICT is set, or 2 otherwise. -1: Allow use of (almost) all events by all users >=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK >=1: Disallow CPU event access by users without CAP_SYS_ADMIN >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN +>=3: Disallow all event access by users without CAP_SYS_ADMIN ============================================================== diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4741ecdb9817..531b8b10fcda 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1168,6 +1168,11 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, int perf_event_max_stack_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +static inline bool perf_paranoid_any(void) +{ + return sysctl_perf_event_paranoid > 2; +} + static inline bool perf_paranoid_tracepoint_raw(void) { return sysctl_perf_event_paranoid > -1; diff --git a/kernel/events/core.c b/kernel/events/core.c index 02c8421f8c01..ede107cc5836 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -389,8 +389,13 @@ static struct srcu_struct pmus_srcu; * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv + * 3 - disallow all unpriv perf event use */ +#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT +int sysctl_perf_event_paranoid __read_mostly = 3; +#else int sysctl_perf_event_paranoid __read_mostly = 2; +#endif /* Minimum for 512 kiB + 1 user control page */ int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ @@ -9533,6 +9538,9 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; + if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) + return -EACCES; + err = perf_copy_attr(attr_uptr, &attr); if (err) return err; diff --git a/security/Kconfig b/security/Kconfig index 118f4549404e..59aea7df12b0 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -18,6 +18,15 @@ config SECURITY_DMESG_RESTRICT If you are unsure how to answer this question, answer N. +config SECURITY_PERF_EVENTS_RESTRICT + bool "Restrict unprivileged use of performance events" + depends on PERF_EVENTS + help + If you say Y here, the kernel.perf_event_paranoid sysctl + will be set to 3 by default, and no unprivileged use of the + perf_event_open syscall will be permitted unless it is + changed. + config SECURITY bool "Enable different security models" depends on SYSFS -- GitLab From 49029705bca2c24c43e4141a8ca1f594e24d185e Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 3 Jun 2016 14:06:14 -0700 Subject: [PATCH 1015/1442] ANDROID: dm verity fec: limit error correction recursion If verity tree itself is sufficiently corrupted in addition to data blocks, it's possible for error correction to end up in a deep recursive error correction loop that eventually causes a kernel panic as follows: [ 14.728962] [] verity_fec_decode+0xa8/0x138 [ 14.734691] [] verity_verify_level+0x11c/0x180 [ 14.740681] [] verity_hash_for_block+0x88/0xe0 [ 14.746671] [] fec_decode_rsb+0x318/0x75c [ 14.752226] [] verity_fec_decode+0xa8/0x138 [ 14.757956] [] verity_verify_level+0x11c/0x180 [ 14.763944] [] verity_hash_for_block+0x88/0xe0 This change limits the recursion to a reasonable level during a single I/O operation. Bug: 28943429 Signed-off-by: Sami Tolvanen Change-Id: I0a7ebff331d259c59a5e03c81918cc1613c3a766 (cherry picked from commit f4b9e40597e73942d2286a73463c55f26f61bfa7) --- drivers/md/dm-verity-fec.c | 11 ++++++++++- drivers/md/dm-verity-fec.h | 4 ++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 0e79f38cb647..b1ddd1597474 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -442,6 +442,13 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, if (!verity_fec_is_enabled(v)) return -EOPNOTSUPP; + if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) { + DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name); + return -EIO; + } + + fio->level++; + if (type == DM_VERITY_BLOCK_TYPE_METADATA) block += v->data_blocks; @@ -473,7 +480,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, if (r < 0) { r = fec_decode_rsb(v, io, fio, rsb, offset, true); if (r < 0) - return r; + goto done; } if (dest) @@ -483,6 +490,8 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, r = verity_for_bv_block(v, io, iter, fec_bv_copy); } +done: + fio->level--; return r; } diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h index a22098db08fd..4db0cae262eb 100644 --- a/drivers/md/dm-verity-fec.h +++ b/drivers/md/dm-verity-fec.h @@ -29,6 +29,9 @@ #define DM_VERITY_FEC_BUF_MAX \ (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS)) +/* maximum recursion level for verity_fec_decode */ +#define DM_VERITY_FEC_MAX_RECURSION 4 + #define DM_VERITY_OPT_FEC_DEV "use_fec_from_device" #define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks" #define DM_VERITY_OPT_FEC_START "fec_start" @@ -62,6 +65,7 @@ struct dm_verity_fec_io { unsigned nbufs; /* number of buffers allocated */ u8 *output; /* buffer for corrected output */ size_t output_pos; + unsigned level; /* recursion level */ }; #ifdef CONFIG_DM_VERITY_FEC -- GitLab From 3278f53e465871b668d62c932075544c8dab7c76 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 3 Jun 2016 14:22:46 -0700 Subject: [PATCH 1016/1442] ANDROID: dm verity fec: add missing release from fec_ktype Add a release function to allow destroying the dm-verity device. Bug: 27928374 Signed-off-by: Sami Tolvanen Change-Id: Ic0f7c17e4889c5580d70b52d9a709a37165a5747 (cherry picked from commit 0039ccf47c8f99888f7b71b2a36a68a027fbe357) --- drivers/md/dm-verity-fec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index b1ddd1597474..0d7669cbdd2c 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -687,7 +687,8 @@ static struct attribute *fec_attrs[] = { static struct kobj_type fec_ktype = { .sysfs_ops = &kobj_sysfs_ops, - .default_attrs = fec_attrs + .default_attrs = fec_attrs, + .release = dm_kobject_release }; /* -- GitLab From 9259a5c52888ab7731334bbc84db7538650128f9 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 17 Jun 2016 11:31:17 -0700 Subject: [PATCH 1017/1442] ANDROID: dm verity fec: initialize recursion level Explicitly initialize recursion level to zero at the beginning of each I/O operation. Bug: 28943429 Change-Id: I00c612be2b8c22dd5afb65a739551df91cb324fc Signed-off-by: Sami Tolvanen (cherry picked from commit 32ffb3a22d7fd269b2961323478ece92c06a8334) --- drivers/md/dm-verity-fec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 0d7669cbdd2c..a8d4d2fd88e7 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -532,6 +532,7 @@ void verity_fec_init_io(struct dm_verity_io *io) memset(fio->bufs, 0, sizeof(fio->bufs)); fio->nbufs = 0; fio->output = NULL; + fio->level = 0; } /* -- GitLab From 49d0e06a253997f3d5e9cea628c71622be6a84b5 Mon Sep 17 00:00:00 2001 From: Thierry Strudel Date: Tue, 14 Jun 2016 17:46:44 -0700 Subject: [PATCH 1018/1442] ANDROID: cpu: send KOBJ_ONLINE event when enabling cpus In case some sysfs nodes needs to be labeled with a different label than sysfs then user needs to be notified when a core is brought back online. Signed-off-by: Thierry Strudel Bug: 29359497 Change-Id: I0395c86e01cd49c348fda8f93087d26f88557c91 --- kernel/cpu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/cpu.c b/kernel/cpu.c index 4619cc105170..19444fca0da9 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1154,6 +1154,7 @@ void __weak arch_enable_nonboot_cpus_end(void) void enable_nonboot_cpus(void) { int cpu, error; + struct device *cpu_device; /* Allow everyone to use the CPU hotplug again */ cpu_maps_update_begin(); @@ -1171,6 +1172,12 @@ void enable_nonboot_cpus(void) trace_suspend_resume(TPS("CPU_ON"), cpu, false); if (!error) { pr_info("CPU%d is up\n", cpu); + cpu_device = get_cpu_device(cpu); + if (!cpu_device) + pr_err("%s: failed to get cpu%d device\n", + __func__, cpu); + else + kobject_uevent(&cpu_device->kobj, KOBJ_ONLINE); continue; } pr_warn("Error taking CPU%d up: %d\n", cpu, error); -- GitLab From 60d4c172c5cde891d3001cb9b7e1a0caeef96988 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 22 Jun 2016 16:49:48 +0800 Subject: [PATCH 1019/1442] ANDROID: netfilter: xt_quota2: make quota2_log work well In upstream commit 7200135bc1e61f1437dc326ae2ef2f310c50b4eb (netfilter: kill ulog targets) http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7200135bc1e6 ipt_ULOG target was removed, meanwhile, the IP_NF_TARGET_ULOG Kconfig and ipt_ULOG.h header file were removed too. This causes we cannot enable QUOTA2_LOG, and netd complains this error: "Unable to open quota socket". So when we reach the quota2 limit, userspace will not be notified with this event. Since IP_NF_TARGET_ULOG was removed, we need not depend on "IP_NF_TARGET_ULOG=n", and for compatibility, add ulog_packet_msg_t related definitions copied from "ipt_ULOG.h". Change-Id: I38132efaabf52bea75dfd736ce734a1b9690e87e Reported-by: Samboo Shen Signed-off-by: Liping Zhang --- net/netfilter/Kconfig | 1 - net/netfilter/xt_quota2.c | 21 ++++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 99734fed536f..177d3ae5fda8 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1392,7 +1392,6 @@ config NETFILTER_XT_MATCH_QUOTA2 config NETFILTER_XT_MATCH_QUOTA2_LOG bool '"quota2" Netfilter LOG support' depends on NETFILTER_XT_MATCH_QUOTA2 - depends on IP_NF_TARGET_ULOG=n # not yes, not module, just no default n help This option allows `quota2' to log ONCE when a quota limit diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c index 99592ae56d9b..834594aa0085 100644 --- a/net/netfilter/xt_quota2.c +++ b/net/netfilter/xt_quota2.c @@ -21,8 +21,27 @@ #include #include + #ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG -#include +/* For compatibility, these definitions are copied from the + * deprecated header file */ +#define ULOG_MAC_LEN 80 +#define ULOG_PREFIX_LEN 32 + +/* Format of the ULOG packets passed through netlink */ +typedef struct ulog_packet_msg { + unsigned long mark; + long timestamp_sec; + long timestamp_usec; + unsigned int hook; + char indev_name[IFNAMSIZ]; + char outdev_name[IFNAMSIZ]; + size_t data_len; + char prefix[ULOG_PREFIX_LEN]; + unsigned char mac_len; + unsigned char mac[ULOG_MAC_LEN]; + unsigned char payload[0]; +} ulog_packet_msg_t; #endif /** -- GitLab From 0de3b4c1a9f049e4d8ec9f39a6c5e44b623b24ae Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Fri, 8 Jul 2016 14:15:14 -0700 Subject: [PATCH 1020/1442] ANDROID: sdcardfs: Truncate packages_gid.list on overflow packages_gid.list was improperly returning the wrong count. Use scnprintf instead, and inform the user that the list was truncated if it is. Bug: 30013843 Change-Id: Ida2b2ef7cd86dd87300bfb4c2cdb6bfe2ee1650d Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/packagelist.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index 10f0d6be718b..9c3340528eee 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -335,13 +335,20 @@ static ssize_t packages_attr_show(struct config_item *item, struct hashtable_entry *hash_cur; struct hlist_node *h_t; int i; - int count = 0; + int count = 0, written = 0; + char errormsg[] = "\n"; + mutex_lock(&pkgl_data_all->hashtable_lock); - hash_for_each_safe(pkgl_data_all->package_to_appid, i, h_t, hash_cur, hlist) - count += snprintf(page + count, PAGE_SIZE - count, "%s %d\n", (char *)hash_cur->key, hash_cur->value); + hash_for_each_safe(pkgl_data_all->package_to_appid, i, h_t, hash_cur, hlist) { + written = scnprintf(page + count, PAGE_SIZE - sizeof(errormsg) - count, "%s %d\n", (char *)hash_cur->key, hash_cur->value); + if (count + written == PAGE_SIZE - sizeof(errormsg)) { + count += scnprintf(page + count, PAGE_SIZE - count, errormsg); + break; + } + count += written; + } mutex_unlock(&pkgl_data_all->hashtable_lock); - return count; } -- GitLab From 774e4414ca9ff2bfd373f72849237e30f874fc65 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 1 Jun 2016 10:28:49 -0700 Subject: [PATCH 1021/1442] ANDROID: sdcardfs: fix itnull.cocci warnings List_for_each_entry has the property that the first argument is always bound to a real list element, never NULL, so testing dentry is not needed. Generated by: scripts/coccinelle/iterators/itnull.cocci Cc: Daniel Rosenberg Signed-off-by: Julia Lawall Signed-off-by: Fengguang Wu Signed-off-by: Guenter Roeck --- fs/sdcardfs/derived_perm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 128b3e56851f..41e0e11b3c35 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -112,7 +112,7 @@ void get_derived_permission(struct dentry *parent, struct dentry *dentry) void get_derive_permissions_recursive(struct dentry *parent) { struct dentry *dentry; list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - if (dentry && dentry->d_inode) { + if (dentry->d_inode) { mutex_lock(&dentry->d_inode->i_mutex); get_derived_permission(parent, dentry); fix_derived_permission(dentry->d_inode); -- GitLab From a0a752add9b539b95fa9a79d6f77fe1938a50ce2 Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Sun, 31 Jul 2016 22:30:14 -0400 Subject: [PATCH 1022/1442] ANDROID: usb: gadget: f_accessory: remove duplicate endpoint alloc usb_ep_autoconfig is called twice for allocating bulk out endpoint. Removed the unwanted call. Fixes Issue: 67180 Change-Id: I03e87a86fbbbc85831ff7f0496adf038d1de2956 Signed-off-by: Anson Jacob --- drivers/usb/gadget/function/f_accessory.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index c62123560143..2ca16a577542 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -531,15 +531,6 @@ static int create_bulk_endpoints(struct acc_dev *dev, ep->driver_data = dev; /* claim the endpoint */ dev->ep_out = ep; - ep = usb_ep_autoconfig(cdev->gadget, out_desc); - if (!ep) { - DBG(cdev, "usb_ep_autoconfig for ep_out failed\n"); - return -ENODEV; - } - DBG(cdev, "usb_ep_autoconfig for ep_out got %s\n", ep->name); - ep->driver_data = dev; /* claim the endpoint */ - dev->ep_out = ep; - /* now allocate requests for our endpoints */ for (i = 0; i < TX_REQ_MAX; i++) { req = acc_request_new(dev->ep_in, BULK_BUFFER_SIZE); -- GitLab From a058da83727d9f3df84c956d9b29d775a2a9d45f Mon Sep 17 00:00:00 2001 From: Will Drewry Date: Wed, 9 Jun 2010 17:47:38 -0500 Subject: [PATCH 1023/1442] CHROMIUM: dm: boot time specification of dm= This is a wrap-up of three patches pending upstream approval. I'm bundling them because they are interdependent, and it'll be easier to drop it on rebase later. 1. dm: allow a dm-fs-style device to be shared via dm-ioctl Integrates feedback from Alisdair, Mike, and Kiyoshi. Two main changes occur here: - One function is added which allows for a programmatically created mapped device to be inserted into the dm-ioctl hash table. This binds the device to a name and, optional, uuid which is needed by udev and allows for userspace management of the mapped device. - dm_table_complete() was extended to handle all of the final functional changes required for the table to be operational once called. 2. init: boot to device-mapper targets without an initr* Add a dm= kernel parameter modeled after the md= parameter from do_mounts_md. It allows for device-mapper targets to be configured at boot time for use early in the boot process (as the root device or otherwise). It also replaces /dev/XXX calls with major:minor opportunistically. The format is dm="name uuid ro,table line 1,table line 2,...". The parser expects the comma to be safe to use as a newline substitute but, otherwise, uses the normal separator of space. Some attempt has been made to make it forgiving of additional spaces (using skip_spaces()). A mapped device created during boot will be assigned a minor of 0 and may be access via /dev/dm-0. An example dm-linear root with no uuid may look like: root=/dev/dm-0 dm="lroot none ro, 0 4096 linear /dev/ubdb 0, 4096 4096 linear /dv/ubdc 0" Once udev is started, /dev/dm-0 will become /dev/mapper/lroot. Older upstream threads: http://marc.info/?l=dm-devel&m=127429492521964&w=2 http://marc.info/?l=dm-devel&m=127429499422096&w=2 http://marc.info/?l=dm-devel&m=127429493922000&w=2 Latest upstream threads: https://patchwork.kernel.org/patch/104859/ https://patchwork.kernel.org/patch/104860/ https://patchwork.kernel.org/patch/104861/ Bug: 27175947 Signed-off-by: Will Drewry Review URL: http://codereview.chromium.org/2020011 Change-Id: I92bd53432a11241228d2e5ac89a3b20d19b05a31 --- Documentation/device-mapper/boot.txt | 42 +++ Documentation/kernel-parameters.txt | 6 + drivers/md/dm-ioctl.c | 39 +++ drivers/md/dm-table.c | 1 + include/linux/device-mapper.h | 6 + init/Makefile | 1 + init/do_mounts.c | 1 + init/do_mounts.h | 10 + init/do_mounts_dm.c | 410 +++++++++++++++++++++++++++ 9 files changed, 516 insertions(+) create mode 100644 Documentation/device-mapper/boot.txt create mode 100644 init/do_mounts_dm.c diff --git a/Documentation/device-mapper/boot.txt b/Documentation/device-mapper/boot.txt new file mode 100644 index 000000000000..adcaad5e5e32 --- /dev/null +++ b/Documentation/device-mapper/boot.txt @@ -0,0 +1,42 @@ +Boot time creation of mapped devices +=================================== + +It is possible to configure a device mapper device to act as the root +device for your system in two ways. + +The first is to build an initial ramdisk which boots to a minimal +userspace which configures the device, then pivot_root(8) in to it. + +For simple device mapper configurations, it is possible to boot directly +using the following kernel command line: + +dm=" ,table line 1,...,table line n" + +name = the name to associate with the device + after boot, udev, if used, will use that name to label + the device node. +uuid = may be 'none' or the UUID desired for the device. +ro = may be "ro" or "rw". If "ro", the device and device table will be + marked read-only. + +Each table line may be as normal when using the dmsetup tool except for +two variations: +1. Any use of commas will be interpreted as a newline +2. Quotation marks cannot be escaped and cannot be used without + terminating the dm= argument. + +Unless renamed by udev, the device node created will be dm-0 as the +first minor number for the device-mapper is used during early creation. + +Example +======= + +- Booting to a linear array made up of user-mode linux block devices: + + dm="lroot none 0, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" \ + root=/dev/dm-0 + +Will boot to a rw dm-linear target of 8192 sectors split across two +block devices identified by their major:minor numbers. After boot, udev +will rename this target to /dev/mapper/lroot (depending on the rules). +No uuid was assigned. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 37babf91f2cb..e24b28ee3b39 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -87,6 +87,7 @@ parameter is applicable: BLACKFIN Blackfin architecture is enabled. CLK Common clock infrastructure is enabled. CMA Contiguous Memory Area support is enabled. + DM Device mapper support is enabled. DRM Direct Rendering Management support is enabled. DYNAMIC_DEBUG Build in debug messages and enable them at runtime EDD BIOS Enhanced Disk Drive Services (EDD) is enabled @@ -1025,6 +1026,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. dis_ucode_ldr [X86] Disable the microcode loader. + dm= [DM] Allows early creation of a device-mapper device. + See Documentation/device-mapper/boot.txt. + + dmasound= [HW,OSS] Sound subsystem buff + dma_debug=off If the kernel is compiled with DMA_API_DEBUG support, this option disables the debugging code at boot. diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 966eb4b61aed..89ec6d26881b 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1927,6 +1927,45 @@ void dm_interface_exit(void) dm_hash_exit(); } + +/** + * dm_ioctl_export - Permanently export a mapped device via the ioctl interface + * @md: Pointer to mapped_device + * @name: Buffer (size DM_NAME_LEN) for name + * @uuid: Buffer (size DM_UUID_LEN) for uuid or NULL if not desired + */ +int dm_ioctl_export(struct mapped_device *md, const char *name, + const char *uuid) +{ + int r = 0; + struct hash_cell *hc; + + if (!md) { + r = -ENXIO; + goto out; + } + + /* The name and uuid can only be set once. */ + mutex_lock(&dm_hash_cells_mutex); + hc = dm_get_mdptr(md); + mutex_unlock(&dm_hash_cells_mutex); + if (hc) { + DMERR("%s: already exported", dm_device_name(md)); + r = -ENXIO; + goto out; + } + + r = dm_hash_insert(name, uuid, md); + if (r) { + DMERR("%s: could not bind to '%s'", dm_device_name(md), name); + goto out; + } + + /* Let udev know we've changed. */ + dm_kobject_uevent(md, KOBJ_CHANGE, dm_get_event_nr(md)); +out: + return r; +} /** * dm_copy_name_and_uuid - Copy mapped device name & uuid into supplied buffers * @md: Pointer to mapped_device diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index c4b53b332607..399bcacb6d7c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ef7962e84444..0e1e050bc574 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -407,6 +407,12 @@ void dm_put(struct mapped_device *md); void dm_set_mdptr(struct mapped_device *md, void *ptr); void *dm_get_mdptr(struct mapped_device *md); +/* + * Export the device via the ioctl interface (uses mdptr). + */ +int dm_ioctl_export(struct mapped_device *md, const char *name, + const char *uuid); + /* * A device can still be used while suspended, but I/O is deferred. */ diff --git a/init/Makefile b/init/Makefile index de8e0aa42139..d210b235c5d7 100644 --- a/init/Makefile +++ b/init/Makefile @@ -17,6 +17,7 @@ mounts-y := do_mounts.o mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o +mounts-$(CONFIG_BLK_DEV_DM) += do_mounts_dm.o # dependencies on generated files need to be listed explicitly $(obj)/version.o: include/generated/compile.h diff --git a/init/do_mounts.c b/init/do_mounts.c index dea5de95c2dd..1902a1c80831 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -566,6 +566,7 @@ void __init prepare_namespace(void) wait_for_device_probe(); md_run_setup(); + dm_run_setup(); if (saved_root_name[0]) { root_device_name = saved_root_name; diff --git a/init/do_mounts.h b/init/do_mounts.h index 067af1d9e8b6..ecb275782c03 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -74,3 +74,13 @@ void md_run_setup(void); static inline void md_run_setup(void) {} #endif + +#ifdef CONFIG_BLK_DEV_DM + +void dm_run_setup(void); + +#else + +static inline void dm_run_setup(void) {} + +#endif diff --git a/init/do_mounts_dm.c b/init/do_mounts_dm.c new file mode 100644 index 000000000000..0fd3411533f3 --- /dev/null +++ b/init/do_mounts_dm.c @@ -0,0 +1,410 @@ +/* do_mounts_dm.c + * Copyright (C) 2010 The Chromium OS Authors + * All Rights Reserved. + * Based on do_mounts_md.c + * + * This file is released under the GPL. + */ +#include +#include +#include + +#include "do_mounts.h" + +#define DM_MAX_NAME 32 +#define DM_MAX_UUID 129 +#define DM_NO_UUID "none" + +#define DM_MSG_PREFIX "init" + +/* Separators used for parsing the dm= argument. */ +#define DM_FIELD_SEP ' ' +#define DM_LINE_SEP ',' + +/* + * When the device-mapper and any targets are compiled into the kernel + * (not a module), one target may be created and used as the root device at + * boot time with the parameters given with the boot line dm=... + * The code for that is here. + */ + +struct dm_setup_target { + sector_t begin; + sector_t length; + char *type; + char *params; + /* simple singly linked list */ + struct dm_setup_target *next; +}; + +static struct { + int minor; + int ro; + char name[DM_MAX_NAME]; + char uuid[DM_MAX_UUID]; + char *targets; + struct dm_setup_target *target; + int target_count; +} dm_setup_args __initdata; + +static __initdata int dm_early_setup; + +static size_t __init get_dm_option(char *str, char **next, char sep) +{ + size_t len = 0; + char *endp = NULL; + + if (!str) + return 0; + + endp = strchr(str, sep); + if (!endp) { /* act like strchrnul */ + len = strlen(str); + endp = str + len; + } else { + len = endp - str; + } + + if (endp == str) + return 0; + + if (!next) + return len; + + if (*endp == 0) { + /* Don't advance past the nul. */ + *next = endp; + } else { + *next = endp + 1; + } + return len; +} + +static int __init dm_setup_args_init(void) +{ + dm_setup_args.minor = 0; + dm_setup_args.ro = 0; + dm_setup_args.target = NULL; + dm_setup_args.target_count = 0; + return 0; +} + +static int __init dm_setup_cleanup(void) +{ + struct dm_setup_target *target = dm_setup_args.target; + struct dm_setup_target *old_target = NULL; + while (target) { + kfree(target->type); + kfree(target->params); + old_target = target; + target = target->next; + kfree(old_target); + dm_setup_args.target_count--; + } + BUG_ON(dm_setup_args.target_count); + return 0; +} + +static char * __init dm_setup_parse_device_args(char *str) +{ + char *next = NULL; + size_t len = 0; + + /* Grab the logical name of the device to be exported to udev */ + len = get_dm_option(str, &next, DM_FIELD_SEP); + if (!len) { + DMERR("failed to parse device name"); + goto parse_fail; + } + len = min(len + 1, sizeof(dm_setup_args.name)); + strlcpy(dm_setup_args.name, str, len); /* includes nul */ + str = skip_spaces(next); + + /* Grab the UUID value or "none" */ + len = get_dm_option(str, &next, DM_FIELD_SEP); + if (!len) { + DMERR("failed to parse device uuid"); + goto parse_fail; + } + len = min(len + 1, sizeof(dm_setup_args.uuid)); + strlcpy(dm_setup_args.uuid, str, len); + str = skip_spaces(next); + + /* Determine if the table/device will be read only or read-write */ + if (!strncmp("ro,", str, 3)) { + dm_setup_args.ro = 1; + } else if (!strncmp("rw,", str, 3)) { + dm_setup_args.ro = 0; + } else { + DMERR("failed to parse table mode"); + goto parse_fail; + } + str = skip_spaces(str + 3); + + return str; + +parse_fail: + return NULL; +} + +static void __init dm_substitute_devices(char *str, size_t str_len) +{ + char *candidate = str; + char *candidate_end = str; + char old_char; + size_t len = 0; + dev_t dev; + + if (str_len < 3) + return; + + while (str && *str) { + candidate = strchr(str, '/'); + if (!candidate) + break; + + /* Avoid embedded slashes */ + if (candidate != str && *(candidate - 1) != DM_FIELD_SEP) { + str = strchr(candidate, DM_FIELD_SEP); + continue; + } + + len = get_dm_option(candidate, &candidate_end, DM_FIELD_SEP); + str = skip_spaces(candidate_end); + if (len < 3 || len > 37) /* name_to_dev_t max; maj:mix min */ + continue; + + /* Temporarily terminate with a nul */ + candidate_end--; + old_char = *candidate_end; + *candidate_end = '\0'; + + DMDEBUG("converting candidate device '%s' to dev_t", candidate); + /* Use the boot-time specific device naming */ + dev = name_to_dev_t(candidate); + *candidate_end = old_char; + + DMDEBUG(" -> %u", dev); + /* No suitable replacement found */ + if (!dev) + continue; + + /* Rewrite the /dev/path as a major:minor */ + len = snprintf(candidate, len, "%u:%u", MAJOR(dev), MINOR(dev)); + if (!len) { + DMERR("error substituting device major/minor."); + break; + } + candidate += len; + /* Pad out with spaces (fixing our nul) */ + while (candidate < candidate_end) + *(candidate++) = DM_FIELD_SEP; + } +} + +static int __init dm_setup_parse_targets(char *str) +{ + char *next = NULL; + size_t len = 0; + struct dm_setup_target **target = NULL; + + /* Targets are defined as per the table format but with a + * comma as a newline separator. */ + target = &dm_setup_args.target; + while (str && *str) { + *target = kzalloc(sizeof(struct dm_setup_target), GFP_KERNEL); + if (!*target) { + DMERR("failed to allocate memory for target %d", + dm_setup_args.target_count); + goto parse_fail; + } + dm_setup_args.target_count++; + + (*target)->begin = simple_strtoull(str, &next, 10); + if (!next || *next != DM_FIELD_SEP) { + DMERR("failed to parse starting sector for target %d", + dm_setup_args.target_count - 1); + goto parse_fail; + } + str = skip_spaces(next + 1); + + (*target)->length = simple_strtoull(str, &next, 10); + if (!next || *next != DM_FIELD_SEP) { + DMERR("failed to parse length for target %d", + dm_setup_args.target_count - 1); + goto parse_fail; + } + str = skip_spaces(next + 1); + + len = get_dm_option(str, &next, DM_FIELD_SEP); + if (!len || + !((*target)->type = kstrndup(str, len, GFP_KERNEL))) { + DMERR("failed to parse type for target %d", + dm_setup_args.target_count - 1); + goto parse_fail; + } + str = skip_spaces(next); + + len = get_dm_option(str, &next, DM_LINE_SEP); + if (!len || + !((*target)->params = kstrndup(str, len, GFP_KERNEL))) { + DMERR("failed to parse params for target %d", + dm_setup_args.target_count - 1); + goto parse_fail; + } + str = skip_spaces(next); + + /* Before moving on, walk through the copied target and + * attempt to replace all /dev/xxx with the major:minor number. + * It may not be possible to resolve them traditionally at + * boot-time. */ + dm_substitute_devices((*target)->params, len); + + target = &((*target)->next); + } + DMDEBUG("parsed %d targets", dm_setup_args.target_count); + + return 0; + +parse_fail: + return 1; +} + +/* + * Parse the command-line parameters given our kernel, but do not + * actually try to invoke the DM device now; that is handled by + * dm_setup_drive after the low-level disk drivers have initialised. + * dm format is as follows: + * dm="name uuid fmode,[table line 1],[table line 2],..." + * May be used with root=/dev/dm-0 as it always uses the first dm minor. + */ + +static int __init dm_setup(char *str) +{ + dm_setup_args_init(); + + str = dm_setup_parse_device_args(str); + if (!str) { + DMDEBUG("str is NULL"); + goto parse_fail; + } + + /* Target parsing is delayed until we have dynamic memory */ + dm_setup_args.targets = str; + + printk(KERN_INFO "dm: will configure '%s' on dm-%d\n", + dm_setup_args.name, dm_setup_args.minor); + + dm_early_setup = 1; + return 1; + +parse_fail: + printk(KERN_WARNING "dm: Invalid arguments supplied to dm=.\n"); + return 0; +} + + +static void __init dm_setup_drive(void) +{ + struct mapped_device *md = NULL; + struct dm_table *table = NULL; + struct dm_setup_target *target; + char *uuid = dm_setup_args.uuid; + fmode_t fmode = FMODE_READ; + + /* Finish parsing the targets. */ + if (dm_setup_parse_targets(dm_setup_args.targets)) + goto parse_fail; + + if (dm_create(dm_setup_args.minor, &md)) { + DMDEBUG("failed to create the device"); + goto dm_create_fail; + } + DMDEBUG("created device '%s'", dm_device_name(md)); + + /* In addition to flagging the table below, the disk must be + * set explicitly ro/rw. */ + set_disk_ro(dm_disk(md), dm_setup_args.ro); + + if (!dm_setup_args.ro) + fmode |= FMODE_WRITE; + if (dm_table_create(&table, fmode, dm_setup_args.target_count, md)) { + DMDEBUG("failed to create the table"); + goto dm_table_create_fail; + } + + target = dm_setup_args.target; + while (target) { + DMINFO("adding target '%llu %llu %s %s'", + (unsigned long long) target->begin, + (unsigned long long) target->length, target->type, + target->params); + if (dm_table_add_target(table, target->type, target->begin, + target->length, target->params)) { + DMDEBUG("failed to add the target to the table"); + goto add_target_fail; + } + target = target->next; + } + + if (dm_table_complete(table)) { + DMDEBUG("failed to complete the table"); + goto table_complete_fail; + } + + /* Suspend the device so that we can bind it to the table. */ + if (dm_suspend(md, 0)) { + DMDEBUG("failed to suspend the device pre-bind"); + goto suspend_fail; + } + + /* Bind the table to the device. This is the only way to associate + * md->map with the table and set the disk capacity directly. */ + if (dm_swap_table(md, table)) { /* should return NULL. */ + DMDEBUG("failed to bind the device to the table"); + goto table_bind_fail; + } + + /* Finally, resume and the device should be ready. */ + if (dm_resume(md)) { + DMDEBUG("failed to resume the device"); + goto resume_fail; + } + + /* Export the dm device via the ioctl interface */ + if (!strcmp(DM_NO_UUID, dm_setup_args.uuid)) + uuid = NULL; + if (dm_ioctl_export(md, dm_setup_args.name, uuid)) { + DMDEBUG("failed to export device with given name and uuid"); + goto export_fail; + } + printk(KERN_INFO "dm: dm-%d is ready\n", dm_setup_args.minor); + + dm_setup_cleanup(); + return; + +export_fail: +resume_fail: +table_bind_fail: +suspend_fail: +table_complete_fail: +add_target_fail: + dm_table_put(table); +dm_table_create_fail: + dm_put(md); +dm_create_fail: + dm_setup_cleanup(); +parse_fail: + printk(KERN_WARNING "dm: starting dm-%d (%s) failed\n", + dm_setup_args.minor, dm_setup_args.name); +} + +__setup("dm=", dm_setup); + +void __init dm_run_setup(void) +{ + if (!dm_early_setup) + return; + printk(KERN_INFO "dm: attempting early device configuration.\n"); + dm_setup_drive(); +} -- GitLab From ae8b49031118932b376426907e5245c18e06112b Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 8 Feb 2016 16:47:41 -0800 Subject: [PATCH 1024/1442] ANDROID: dm: Rebase on top of 4.9 1. "dm: optimize use SRCU and RCU" removes the use of dm_table_put. 2. "dm: remove request-based logic from make_request_fn wrapper" necessitates calling dm_setup_md_queue or else the request_queue's make_request_fn pointer ends being unset. [ 7.711600] Internal error: Oops - bad mode: 0 [#1] PREEMPT SMP [ 7.717519] CPU: 1 PID: 1 Comm: swapper/0 Tainted: G W 4.1.15-02273-gb057d16-dirty #33 [ 7.726559] Hardware name: HiKey Development Board (DT) [ 7.731779] task: ffffffc005f8acc0 ti: ffffffc005f8c000 task.ti: ffffffc005f8c000 [ 7.739257] PC is at 0x0 [ 7.741787] LR is at generic_make_request+0x8c/0x108 .... [ 9.082931] Call trace: [ 9.085372] [< (null)>] (null) [ 9.090074] [] submit_bio+0x98/0x1e0 [ 9.095212] [] _submit_bh+0x120/0x1f0 [ 9.096165] cfg80211: Calling CRDA to update world regulatory domain [ 9.106781] [] __bread_gfp+0x94/0x114 [ 9.112004] [] ext4_fill_super+0x18c/0x2d64 [ 9.117750] [] mount_bdev+0x194/0x1c0 [ 9.122973] [] ext4_mount+0x14/0x1c [ 9.128021] [] mount_fs+0x3c/0x194 [ 9.132985] [] vfs_kern_mount+0x4c/0x134 [ 9.138467] [] do_mount+0x204/0xbbc [ 9.143514] [] SyS_mount+0x94/0xe8 [ 9.148479] [] mount_block_root+0x120/0x24c [ 9.154222] [] mount_root+0x110/0x12c [ 9.159443] [] prepare_namespace+0x170/0x1b8 [ 9.165273] [] kernel_init_freeable+0x23c/0x260 [ 9.171365] [] kernel_init+0x10/0x118 [ 9.176589] Code: bad PC value [ 9.179807] ---[ end trace 75e1bc52ba364d13 ]--- Bug: 27175947 Signed-off-by: Badhri Jagan Sridharan Change-Id: I952d86fd1475f0825f9be1386e3497b36127abd0 --- init/do_mounts_dm.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/init/do_mounts_dm.c b/init/do_mounts_dm.c index 0fd3411533f3..0fe9c5f7d5e9 100644 --- a/init/do_mounts_dm.c +++ b/init/do_mounts_dm.c @@ -10,6 +10,7 @@ #include #include "do_mounts.h" +#include "../drivers/md/dm.h" #define DM_MAX_NAME 32 #define DM_MAX_UUID 129 @@ -333,6 +334,7 @@ static void __init dm_setup_drive(void) goto dm_table_create_fail; } + dm_lock_md_type(md); target = dm_setup_args.target; while (target) { DMINFO("adding target '%llu %llu %s %s'", @@ -352,6 +354,17 @@ static void __init dm_setup_drive(void) goto table_complete_fail; } + if (dm_get_md_type(md) == DM_TYPE_NONE) { + dm_set_md_type(md, dm_table_get_type(table)); + if (dm_setup_md_queue(md, table)) { + DMWARN("unable to set up device queue for new table."); + goto setup_md_queue_fail; + } + } else if (dm_get_md_type(md) != dm_table_get_type(table)) { + DMWARN("can't change device type after initial table load."); + goto setup_md_queue_fail; + } + /* Suspend the device so that we can bind it to the table. */ if (dm_suspend(md, 0)) { DMDEBUG("failed to suspend the device pre-bind"); @@ -380,6 +393,7 @@ static void __init dm_setup_drive(void) } printk(KERN_INFO "dm: dm-%d is ready\n", dm_setup_args.minor); + dm_unlock_md_type(md); dm_setup_cleanup(); return; @@ -387,9 +401,10 @@ static void __init dm_setup_drive(void) resume_fail: table_bind_fail: suspend_fail: +setup_md_queue_fail: table_complete_fail: add_target_fail: - dm_table_put(table); + dm_unlock_md_type(md); dm_table_create_fail: dm_put(md); dm_create_fail: -- GitLab From ba2055cde18724292cc8013dd1e7db199b59771d Mon Sep 17 00:00:00 2001 From: Jeremy Compostella Date: Mon, 2 May 2016 17:29:28 +0200 Subject: [PATCH 1025/1442] ANDROID: dm: fix dm_substitute_devices() When candidate is the last parameter, candidate_end points to the '\0' character and not the DM_FIELD_SEP character. In such a situation, we should not move the candidate_end pointer one character backward. Signed-off-by: Jeremy Compostella --- init/do_mounts_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/init/do_mounts_dm.c b/init/do_mounts_dm.c index 0fe9c5f7d5e9..a557c5ee00a7 100644 --- a/init/do_mounts_dm.c +++ b/init/do_mounts_dm.c @@ -176,7 +176,8 @@ static void __init dm_substitute_devices(char *str, size_t str_len) continue; /* Temporarily terminate with a nul */ - candidate_end--; + if (*candidate_end) + candidate_end--; old_char = *candidate_end; *candidate_end = '\0'; -- GitLab From 8bb45a5c2379af13b5361b08bb49512c51ab5b00 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 14 Dec 2015 20:09:39 -0800 Subject: [PATCH 1026/1442] ANDROID: dm: Add android verity target This device-mapper target is virtually a VERITY target. This target is setup by reading the metadata contents piggybacked to the actual data blocks in the block device. The signature of the metadata contents are verified against the key included in the system keyring. Upon success, the underlying verity target is setup. BUG: 27175947 Change-Id: I7e99644a0960ac8279f02c0158ed20999510ea97 Signed-off-by: Badhri Jagan Sridharan --- drivers/md/Kconfig | 16 + drivers/md/Makefile | 4 + drivers/md/dm-android-verity.c | 771 +++++++++++++++++++++++++++++++++ drivers/md/dm-android-verity.h | 92 ++++ drivers/md/dm-verity-target.c | 12 +- drivers/md/dm-verity.h | 12 + 6 files changed, 901 insertions(+), 6 deletions(-) create mode 100644 drivers/md/dm-android-verity.c create mode 100644 drivers/md/dm-android-verity.h diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 02a5345a44a6..8f85df687f95 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -500,4 +500,20 @@ config DM_LOG_WRITES If unsure, say N. +config DM_ANDROID_VERITY + bool "Android verity target support" + depends on DM_VERITY + depends on X509_CERTIFICATE_PARSER + depends on SYSTEM_TRUSTED_KEYRING + depends on PUBLIC_KEY_ALGO_RSA + depends on KEYS + depends on ASYMMETRIC_KEY_TYPE + depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE + ---help--- + This device-mapper target is virtually a VERITY target. This + target is setup by reading the metadata contents piggybacked + to the actual data blocks in the block device. The signature + of the metadata contents are verified against the key included + in the system keyring. Upon success, the underlying verity + target is setup. endif # MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 3cbda1af87a0..fa5941fc4c76 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -67,3 +67,7 @@ endif ifeq ($(CONFIG_DM_VERITY_FEC),y) dm-verity-objs += dm-verity-fec.o endif + +ifeq ($(CONFIG_DM_ANDROID_VERITY),y) +dm-verity-objs += dm-android-verity.o +endif diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c new file mode 100644 index 000000000000..c77c9fa7a962 --- /dev/null +++ b/drivers/md/dm-android-verity.c @@ -0,0 +1,771 @@ +/* + * Copyright (C) 2015 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "dm-verity.h" +#include "dm-android-verity.h" + +static char verifiedbootstate[VERITY_COMMANDLINE_PARAM_LENGTH]; +static char veritymode[VERITY_COMMANDLINE_PARAM_LENGTH]; + +static int __init verified_boot_state_param(char *line) +{ + strlcpy(verifiedbootstate, line, sizeof(verifiedbootstate)); + return 1; +} + +__setup("androidboot.verifiedbootstate=", verified_boot_state_param); + +static int __init verity_mode_param(char *line) +{ + strlcpy(veritymode, line, sizeof(veritymode)); + return 1; +} + +__setup("androidboot.veritymode=", verity_mode_param); + +static int table_extract_mpi_array(struct public_key_signature *pks, + const void *data, size_t len) +{ + MPI mpi = mpi_read_raw_data(data, len); + + if (!mpi) { + DMERR("Error while allocating mpi array"); + return -ENOMEM; + } + + pks->mpi[0] = mpi; + pks->nr_mpi = 1; + return 0; +} + +static struct public_key_signature *table_make_digest( + enum pkey_hash_algo hash, + const void *table, + unsigned long table_len) +{ + struct public_key_signature *pks = NULL; + struct crypto_shash *tfm; + struct shash_desc *desc; + size_t digest_size, desc_size; + int ret; + + /* Allocate the hashing algorithm we're going to need and find out how + * big the hash operational data will be. + */ + tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0); + if (IS_ERR(tfm)) + return ERR_CAST(tfm); + + desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); + digest_size = crypto_shash_digestsize(tfm); + + /* We allocate the hash operational data storage on the end of out + * context data and the digest output buffer on the end of that. + */ + ret = -ENOMEM; + pks = kzalloc(digest_size + sizeof(*pks) + desc_size, GFP_KERNEL); + if (!pks) + goto error; + + pks->pkey_hash_algo = hash; + pks->digest = (u8 *)pks + sizeof(*pks) + desc_size; + pks->digest_size = digest_size; + + desc = (struct shash_desc *)(pks + 1); + desc->tfm = tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + ret = crypto_shash_init(desc); + if (ret < 0) + goto error; + + ret = crypto_shash_finup(desc, table, table_len, pks->digest); + if (ret < 0) + goto error; + + crypto_free_shash(tfm); + return pks; + +error: + kfree(pks); + crypto_free_shash(tfm); + return ERR_PTR(ret); +} + +static int read_block_dev(struct bio_read *payload, struct block_device *bdev, + sector_t offset, int length) +{ + struct bio *bio; + int err = 0, i; + + payload->number_of_pages = DIV_ROUND_UP(length, PAGE_SIZE); + + bio = bio_alloc(GFP_KERNEL, payload->number_of_pages); + if (!bio) { + DMERR("Error while allocating bio"); + return -ENOMEM; + } + + bio->bi_bdev = bdev; + bio->bi_sector = offset; + + payload->page_io = kzalloc(sizeof(struct page *) * + payload->number_of_pages, GFP_KERNEL); + if (!payload->page_io) { + DMERR("page_io array alloc failed"); + err = -ENOMEM; + goto free_bio; + } + + for (i = 0; i < payload->number_of_pages; i++) { + payload->page_io[i] = alloc_page(GFP_KERNEL); + if (!payload->page_io[i]) { + DMERR("alloc_page failed"); + err = -ENOMEM; + goto free_pages; + } + if (!bio_add_page(bio, payload->page_io[i], PAGE_SIZE, 0)) { + DMERR("bio_add_page error"); + err = -EIO; + goto free_pages; + } + } + + if (!submit_bio_wait(READ, bio)) + /* success */ + goto free_bio; + DMERR("bio read failed"); + err = -EIO; + +free_pages: + for (i = 0; i < payload->number_of_pages; i++) + if (payload->page_io[i]) + __free_page(payload->page_io[i]); + kfree(payload->page_io); +free_bio: + bio_put(bio); + return err; +} + +static inline u64 fec_div_round_up(u64 x, u64 y) +{ + u64 remainder; + + return div64_u64_rem(x, y, &remainder) + + (remainder > 0 ? 1 : 0); +} + +static inline void populate_fec_metadata(struct fec_header *header, + struct fec_ecc_metadata *ecc) +{ + ecc->blocks = fec_div_round_up(le64_to_cpu(header->inp_size), + FEC_BLOCK_SIZE); + ecc->roots = le32_to_cpu(header->roots); + ecc->start = le64_to_cpu(header->inp_size); +} + +static inline int validate_fec_header(struct fec_header *header, u64 offset) +{ + /* move offset to make the sanity check work for backup header + * as well. */ + offset -= offset % FEC_BLOCK_SIZE; + if (le32_to_cpu(header->magic) != FEC_MAGIC || + le32_to_cpu(header->version) != FEC_VERSION || + le32_to_cpu(header->size) != sizeof(struct fec_header) || + le32_to_cpu(header->roots) == 0 || + le32_to_cpu(header->roots) >= FEC_RSM || + offset < le32_to_cpu(header->fec_size) || + offset - le32_to_cpu(header->fec_size) != + le64_to_cpu(header->inp_size)) + return -EINVAL; + + return 0; +} + +static int extract_fec_header(dev_t dev, struct fec_header *fec, + struct fec_ecc_metadata *ecc) +{ + u64 device_size; + struct bio_read payload; + int i, err = 0; + struct block_device *bdev; + + bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); + + if (IS_ERR(bdev)) { + DMERR("bdev get error"); + return PTR_ERR(bdev); + } + + device_size = i_size_read(bdev->bd_inode); + + /* fec metadata size is a power of 2 and PAGE_SIZE + * is a power of 2 as well. + */ + BUG_ON(FEC_BLOCK_SIZE > PAGE_SIZE); + /* 512 byte sector alignment */ + BUG_ON(((device_size - FEC_BLOCK_SIZE) % (1 << SECTOR_SHIFT)) != 0); + + err = read_block_dev(&payload, bdev, (device_size - + FEC_BLOCK_SIZE) / (1 << SECTOR_SHIFT), FEC_BLOCK_SIZE); + if (err) { + DMERR("Error while reading verity metadata"); + goto error; + } + + BUG_ON(sizeof(struct fec_header) > PAGE_SIZE); + memcpy(fec, page_address(payload.page_io[0]), + sizeof(*fec)); + + ecc->valid = true; + if (validate_fec_header(fec, device_size - FEC_BLOCK_SIZE)) { + /* Try the backup header */ + memcpy(fec, page_address(payload.page_io[0]) + FEC_BLOCK_SIZE + - sizeof(*fec) , + sizeof(*fec)); + if (validate_fec_header(fec, device_size - + sizeof(struct fec_header))) + ecc->valid = false; + } + + if (ecc->valid) + populate_fec_metadata(fec, ecc); + + for (i = 0; i < payload.number_of_pages; i++) + __free_page(payload.page_io[i]); + kfree(payload.page_io); + +error: + blkdev_put(bdev, FMODE_READ); + return err; +} +static void find_metadata_offset(struct fec_header *fec, + struct block_device *bdev, u64 *metadata_offset) +{ + u64 device_size; + + device_size = i_size_read(bdev->bd_inode); + + if (le32_to_cpu(fec->magic) == FEC_MAGIC) + *metadata_offset = le64_to_cpu(fec->inp_size) - + VERITY_METADATA_SIZE; + else + *metadata_offset = device_size - VERITY_METADATA_SIZE; +} + +static struct android_metadata *extract_metadata(dev_t dev, + struct fec_header *fec) +{ + struct block_device *bdev; + struct android_metadata_header *header; + struct android_metadata *uninitialized_var(metadata); + int i; + u32 table_length, copy_length, offset; + u64 metadata_offset; + struct bio_read payload; + int err = 0; + + bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); + + if (IS_ERR(bdev)) { + DMERR("blkdev_get_by_dev failed"); + return ERR_CAST(bdev); + } + + find_metadata_offset(fec, bdev, &metadata_offset); + + /* Verity metadata size is a power of 2 and PAGE_SIZE + * is a power of 2 as well. + * PAGE_SIZE is also a multiple of 512 bytes. + */ + if (VERITY_METADATA_SIZE > PAGE_SIZE) + BUG_ON(VERITY_METADATA_SIZE % PAGE_SIZE != 0); + /* 512 byte sector alignment */ + BUG_ON(metadata_offset % (1 << SECTOR_SHIFT) != 0); + + err = read_block_dev(&payload, bdev, metadata_offset / + (1 << SECTOR_SHIFT), VERITY_METADATA_SIZE); + if (err) { + DMERR("Error while reading verity metadata"); + metadata = ERR_PTR(err); + goto blkdev_release; + } + + header = kzalloc(sizeof(*header), GFP_KERNEL); + if (!header) { + DMERR("kzalloc failed for header"); + err = -ENOMEM; + goto free_payload; + } + + memcpy(header, page_address(payload.page_io[0]), + sizeof(*header)); + + DMINFO("bio magic_number:%u protocol_version:%d table_length:%u", + le32_to_cpu(header->magic_number), + le32_to_cpu(header->protocol_version), + le32_to_cpu(header->table_length)); + + metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); + if (!metadata) { + DMERR("kzalloc for metadata failed"); + err = -ENOMEM; + goto free_header; + } + + metadata->header = header; + table_length = le32_to_cpu(header->table_length); + + if (table_length == 0 || + table_length > (VERITY_METADATA_SIZE - + sizeof(struct android_metadata_header))) + goto free_metadata; + + metadata->verity_table = kzalloc(table_length + 1, GFP_KERNEL); + + if (!metadata->verity_table) { + DMERR("kzalloc verity_table failed"); + err = -ENOMEM; + goto free_metadata; + } + + if (sizeof(struct android_metadata_header) + + table_length <= PAGE_SIZE) { + memcpy(metadata->verity_table, page_address(payload.page_io[0]) + + sizeof(struct android_metadata_header), + table_length); + } else { + copy_length = PAGE_SIZE - + sizeof(struct android_metadata_header); + memcpy(metadata->verity_table, page_address(payload.page_io[0]) + + sizeof(struct android_metadata_header), + copy_length); + table_length -= copy_length; + offset = copy_length; + i = 1; + while (table_length != 0) { + if (table_length > PAGE_SIZE) { + memcpy(metadata->verity_table + offset, + page_address(payload.page_io[i]), + PAGE_SIZE); + offset += PAGE_SIZE; + table_length -= PAGE_SIZE; + } else { + memcpy(metadata->verity_table + offset, + page_address(payload.page_io[i]), + table_length); + table_length = 0; + } + i++; + } + } + metadata->verity_table[table_length] = '\0'; + + goto free_payload; + +free_metadata: + kfree(metadata); +free_header: + kfree(header); + metadata = ERR_PTR(err); +free_payload: + for (i = 0; i < payload.number_of_pages; i++) + if (payload.page_io[i]) + __free_page(payload.page_io[i]); + kfree(payload.page_io); + + DMINFO("verity_table: %s", metadata->verity_table); +blkdev_release: + blkdev_put(bdev, FMODE_READ); + return metadata; +} + +/* helper functions to extract properties from dts */ +const char *find_dt_value(const char *name) +{ + struct device_node *firmware; + const char *value; + + firmware = of_find_node_by_path("/firmware/android"); + if (!firmware) + return NULL; + value = of_get_property(firmware, name, NULL); + of_node_put(firmware); + + return value; +} + +static bool is_unlocked(void) +{ + static const char unlocked[] = "orange"; + static const char verified_boot_prop[] = "verifiedbootstate"; + const char *value; + + value = find_dt_value(verified_boot_prop); + if (!value) + value = verifiedbootstate; + + return !strncmp(value, unlocked, sizeof(unlocked) - 1); +} + +static int verity_mode(void) +{ + static const char enforcing[] = "enforcing"; + static const char verified_mode_prop[] = "veritymode"; + const char *value; + + value = find_dt_value(verified_mode_prop); + if (!value) + value = veritymode; + if (!strncmp(value, enforcing, sizeof(enforcing) - 1)) + return DM_VERITY_MODE_RESTART; + + return DM_VERITY_MODE_EIO; +} + +static int verify_header(struct android_metadata_header *header) +{ + int retval = -EINVAL; + + if (is_unlocked() && le32_to_cpu(header->magic_number) == + VERITY_METADATA_MAGIC_DISABLE) { + retval = VERITY_STATE_DISABLE; + return retval; + } + + if (!(le32_to_cpu(header->magic_number) == + VERITY_METADATA_MAGIC_NUMBER) || + (le32_to_cpu(header->magic_number) == + VERITY_METADATA_MAGIC_DISABLE)) { + DMERR("Incorrect magic number"); + return retval; + } + + if (le32_to_cpu(header->protocol_version) != + VERITY_METADATA_VERSION) { + DMERR("Unsupported version %u", + le32_to_cpu(header->protocol_version)); + return retval; + } + + return 0; +} + +static int verify_verity_signature(char *key_id, + struct android_metadata *metadata) +{ + key_ref_t key_ref; + struct key *key; + struct public_key_signature *pks = NULL; + int retval = -EINVAL; + + key_ref = keyring_search(make_key_ref(system_trusted_keyring, 1), + &key_type_asymmetric, key_id); + + if (IS_ERR(key_ref)) { + DMERR("keyring: key not found"); + return -ENOKEY; + } + + key = key_ref_to_ptr(key_ref); + + pks = table_make_digest(PKEY_HASH_SHA256, + (const void *)metadata->verity_table, + le32_to_cpu(metadata->header->table_length)); + + if (IS_ERR(pks)) { + DMERR("hashing failed"); + goto error; + } + + retval = table_extract_mpi_array(pks, &metadata->header->signature[0], + RSANUMBYTES); + if (retval < 0) { + DMERR("Error extracting mpi %d", retval); + goto error; + } + + retval = verify_signature(key, pks); + mpi_free(pks->rsa.s); +error: + kfree(pks); + key_put(key); + + return retval; +} + +static void handle_error(void) +{ + int mode = verity_mode(); + if (mode == DM_VERITY_MODE_RESTART) { + DMERR("triggering restart"); + kernel_restart("dm-verity device corrupted"); + } else { + DMERR("Mounting verity root failed"); + } +} + +static inline bool test_mult_overflow(sector_t a, u32 b) +{ + sector_t r = (sector_t)~0ULL; + + sector_div(r, b); + return a > r; +} + +/* + * Target parameters: + * Key id of the public key in the system keyring. + * Verity metadata's signature would be verified against + * this. If the key id contains spaces, replace them + * with '#'. + * The block device for which dm-verity is being setup. + */ +static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) +{ + dev_t uninitialized_var(dev); + struct android_metadata *uninitialized_var(metadata); + int err = 0, i, mode; + char *key_id, *table_ptr, dummy, + *verity_table_args[VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS]; + /* One for specifying number of opt args and one for mode */ + sector_t data_sectors; + u32 data_block_size; + unsigned int major, minor, + no_of_args = VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS; + struct fec_header fec; + struct fec_ecc_metadata uninitialized_var(ecc); + char buf[FEC_ARG_LENGTH], *buf_ptr; + unsigned long long tmpll; + + if (argc != 2) { + DMERR("Incorrect number of arguments"); + handle_error(); + return -EINVAL; + } + + /* should come as one of the arguments for the verity target */ + key_id = argv[0]; + strreplace(argv[0], '#', ' '); + + if (sscanf(argv[1], "%u:%u%c", &major, &minor, &dummy) == 2) { + dev = MKDEV(major, minor); + if (MAJOR(dev) != major || MINOR(dev) != minor) { + DMERR("Incorrect bdev major minor number"); + handle_error(); + return -EOVERFLOW; + } + } + + DMINFO("key:%s dev:%s", argv[0], argv[1]); + + if (extract_fec_header(dev, &fec, &ecc)) { + DMERR("Error while extracting fec header"); + handle_error(); + return -EINVAL; + } + + metadata = extract_metadata(dev, &fec); + + if (IS_ERR(metadata)) { + DMERR("Error while extracting metadata"); + handle_error(); + return -EINVAL; + } + + err = verify_header(metadata->header); + + if (err == VERITY_STATE_DISABLE) { + DMERR("Mounting root with verity disabled"); + return -EINVAL; + } else if (err) { + DMERR("Verity header handle error"); + handle_error(); + goto free_metadata; + } + + err = verify_verity_signature(key_id, metadata); + + if (err) { + DMERR("Signature verification failed"); + handle_error(); + goto free_metadata; + } else + DMINFO("Signature verification success"); + + table_ptr = metadata->verity_table; + + for (i = 0; i < VERITY_TABLE_ARGS; i++) { + verity_table_args[i] = strsep(&table_ptr, " "); + if (verity_table_args[i] == NULL) + break; + } + + if (i != VERITY_TABLE_ARGS) { + DMERR("Verity table not in the expected format"); + err = -EINVAL; + handle_error(); + goto free_metadata; + } + + if (sscanf(verity_table_args[5], "%llu%c", &tmpll, &dummy) + != 1) { + DMERR("Verity table not in the expected format"); + handle_error(); + err = -EINVAL; + goto free_metadata; + } + + if (tmpll > ULONG_MAX) { + DMERR(" too large. Forgot to turn on CONFIG_LBDAF?"); + handle_error(); + err = -EINVAL; + goto free_metadata; + } + + data_sectors = tmpll; + + if (sscanf(verity_table_args[3], "%u%c", &data_block_size, &dummy) + != 1) { + DMERR("Verity table not in the expected format"); + handle_error(); + err = -EINVAL; + goto free_metadata; + } + + if (test_mult_overflow(data_sectors, data_block_size >> + SECTOR_SHIFT)) { + DMERR("data_sectors too large"); + handle_error(); + err = -EOVERFLOW; + goto free_metadata; + } + + data_sectors *= data_block_size >> SECTOR_SHIFT; + DMINFO("Data sectors %llu", (unsigned long long)data_sectors); + + /* update target length */ + ti->len = data_sectors; + + /*substitute data_dev and hash_dev*/ + verity_table_args[1] = argv[1]; + verity_table_args[2] = argv[1]; + + mode = verity_mode(); + + if (ecc.valid && IS_BUILTIN(CONFIG_DM_VERITY_FEC)) { + if (mode) { + err = snprintf(buf, FEC_ARG_LENGTH, + "%u %s " VERITY_TABLE_OPT_FEC_FORMAT, + 1 + VERITY_TABLE_OPT_FEC_ARGS, + mode == DM_VERITY_MODE_RESTART ? + VERITY_TABLE_OPT_RESTART : VERITY_TABLE_OPT_LOGGING, + argv[1], ecc.start / FEC_BLOCK_SIZE, ecc.blocks, + ecc.roots); + } else { + err = snprintf(buf, FEC_ARG_LENGTH, + "%u " VERITY_TABLE_OPT_FEC_FORMAT, + VERITY_TABLE_OPT_FEC_ARGS, argv[1], + ecc.start / FEC_BLOCK_SIZE, ecc.blocks, ecc.roots); + } + } else if (mode) { + err = snprintf(buf, FEC_ARG_LENGTH, + "2 " VERITY_TABLE_OPT_IGNZERO " %s", + mode == DM_VERITY_MODE_RESTART ? + VERITY_TABLE_OPT_RESTART : VERITY_TABLE_OPT_LOGGING); + } else { + err = snprintf(buf, FEC_ARG_LENGTH, "1 %s", + "ignore_zero_blocks"); + } + + if (err < 0 || err >= FEC_ARG_LENGTH) + goto free_metadata; + + buf_ptr = buf; + + for (i = VERITY_TABLE_ARGS; i < (VERITY_TABLE_ARGS + + VERITY_TABLE_OPT_FEC_ARGS + 2); i++) { + verity_table_args[i] = strsep(&buf_ptr, " "); + if (verity_table_args[i] == NULL) { + no_of_args = i; + break; + } + } + + err = verity_ctr(ti, no_of_args, verity_table_args); + +free_metadata: + kfree(metadata->header); + kfree(metadata->verity_table); + kfree(metadata); + return err; +} + +static struct target_type android_verity_target = { + .name = "android-verity", + .version = {1, 0, 0}, + .module = THIS_MODULE, + .ctr = android_verity_ctr, + .dtr = verity_dtr, + .map = verity_map, + .status = verity_status, + .ioctl = verity_ioctl, + .merge = verity_merge, + .iterate_devices = verity_iterate_devices, + .io_hints = verity_io_hints, +}; + +static int __init dm_android_verity_init(void) +{ + int r; + + r = dm_register_target(&android_verity_target); + if (r < 0) + DMERR("register failed %d", r); + + return r; +} + +static void __exit dm_android_verity_exit(void) +{ + dm_unregister_target(&android_verity_target); +} + +module_init(dm_android_verity_init); +module_exit(dm_android_verity_exit); diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h new file mode 100644 index 000000000000..11477ffd2243 --- /dev/null +++ b/drivers/md/dm-android-verity.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2015 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef DM_ANDROID_VERITY_H +#define DM_ANDROID_VERITY_H + +#include + +#define RSANUMBYTES 256 +#define VERITY_METADATA_MAGIC_NUMBER 0xb001b001 +#define VERITY_METADATA_MAGIC_DISABLE 0x46464f56 +#define VERITY_METADATA_VERSION 0 +#define VERITY_STATE_DISABLE 1 +#define DATA_BLOCK_SIZE (4 * 1024) +#define VERITY_METADATA_SIZE (8 * DATA_BLOCK_SIZE) +#define VERITY_TABLE_ARGS 10 +#define VERITY_COMMANDLINE_PARAM_LENGTH 20 + +#define FEC_MAGIC 0xFECFECFE +#define FEC_BLOCK_SIZE (4 * 1024) +#define FEC_VERSION 0 +#define FEC_RSM 255 +#define FEC_ARG_LENGTH 300 + +#define VERITY_TABLE_OPT_RESTART "restart_on_corruption" +#define VERITY_TABLE_OPT_LOGGING "ignore_corruption" +#define VERITY_TABLE_OPT_IGNZERO "ignore_zero_blocks" + +#define VERITY_TABLE_OPT_FEC_FORMAT \ + "use_fec_from_device %s fec_start %llu fec_blocks %llu fec_roots %u ignore_zero_blocks" +#define VERITY_TABLE_OPT_FEC_ARGS 9 + +#define VERITY_DEBUG 0 + +#define DM_MSG_PREFIX "android-verity" +/* + * There can be two formats. + * if fec is present + * + * if fec is not present + * + */ +/* TODO: rearrange structure to reduce memory holes + * depends on userspace change. + */ +struct fec_header { + __le32 magic; + __le32 version; + __le32 size; + __le32 roots; + __le32 fec_size; + __le64 inp_size; + u8 hash[SHA256_DIGEST_SIZE]; +}; + +struct android_metadata_header { + __le32 magic_number; + __le32 protocol_version; + char signature[RSANUMBYTES]; + __le32 table_length; +}; + +struct android_metadata { + struct android_metadata_header *header; + char *verity_table; +}; + +struct fec_ecc_metadata { + bool valid; + u32 roots; + u64 blocks; + u64 rounds; + u64 start; +}; + +struct bio_read { + struct page **page_io; + int number_of_pages; +}; + +#endif /* DM_ANDROID_VERITY_H */ diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 0aba34a7b3b3..10362700a00c 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -551,7 +551,7 @@ static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io) * Bio map function. It allocates dm_verity_io structure and bio vector and * fills them. Then it issues prefetches and the I/O. */ -static int verity_map(struct dm_target *ti, struct bio *bio) +int verity_map(struct dm_target *ti, struct bio *bio) { struct dm_verity *v = ti->private; struct dm_verity_io *io; @@ -596,7 +596,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) /* * Status: V (valid) or C (corruption found) */ -static void verity_status(struct dm_target *ti, status_type_t type, +void verity_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { struct dm_verity *v = ti->private; @@ -669,7 +669,7 @@ static int verity_prepare_ioctl(struct dm_target *ti, return 0; } -static int verity_iterate_devices(struct dm_target *ti, +int verity_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { struct dm_verity *v = ti->private; @@ -677,7 +677,7 @@ static int verity_iterate_devices(struct dm_target *ti, return fn(ti, v->data_dev, v->data_start, ti->len, data); } -static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) +void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct dm_verity *v = ti->private; @@ -690,7 +690,7 @@ static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, limits->logical_block_size); } -static void verity_dtr(struct dm_target *ti) +void verity_dtr(struct dm_target *ti) { struct dm_verity *v = ti->private; @@ -817,7 +817,7 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) * * Hex string or "-" if no salt. */ -static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) +int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) { struct dm_verity *v; struct dm_arg_set as; diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index fb419f422d73..d9cf5e4939eb 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -126,4 +126,16 @@ extern int verity_hash(struct dm_verity *v, struct shash_desc *desc, extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, sector_t block, u8 *digest, bool *is_zero); +extern void verity_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen); +extern int verity_ioctl(struct dm_target *ti, unsigned cmd, + unsigned long arg); +extern int verity_merge(struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size); +extern int verity_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data); +extern void verity_io_hints(struct dm_target *ti, struct queue_limits *limits); +extern void verity_dtr(struct dm_target *ti); +extern int verity_ctr(struct dm_target *ti, unsigned argc, char **argv); +extern int verity_map(struct dm_target *ti, struct bio *bio); #endif /* DM_VERITY_H */ -- GitLab From 56f6a6b2b1cddd93f0b5cea1699a4a661daaacc6 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 8 Feb 2016 16:28:43 -0800 Subject: [PATCH 1027/1442] ANDROID: dm-android-verity: Rebase on top of 4.1 Following CLs in upstream causes minor changes to dm-android-verity target. 1. keys: change asymmetric keys to use common hash definitions 2. block: Abstract out bvec iterator Rebase dm-android-verity on top of these changes. Bug: 27175947 Signed-off-by: Badhri Jagan Sridharan Change-Id: Icfdc3e7b3ead5de335a059cade1aca70414db415 --- drivers/md/dm-android-verity.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index c77c9fa7a962..aeb5045830d9 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -75,7 +75,7 @@ static int table_extract_mpi_array(struct public_key_signature *pks, } static struct public_key_signature *table_make_digest( - enum pkey_hash_algo hash, + enum hash_algo hash, const void *table, unsigned long table_len) { @@ -88,7 +88,7 @@ static struct public_key_signature *table_make_digest( /* Allocate the hashing algorithm we're going to need and find out how * big the hash operational data will be. */ - tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0); + tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0); if (IS_ERR(tfm)) return ERR_CAST(tfm); @@ -143,7 +143,7 @@ static int read_block_dev(struct bio_read *payload, struct block_device *bdev, } bio->bi_bdev = bdev; - bio->bi_sector = offset; + bio->bi_iter.bi_sector = offset; payload->page_io = kzalloc(sizeof(struct page *) * payload->number_of_pages, GFP_KERNEL); @@ -505,7 +505,7 @@ static int verify_verity_signature(char *key_id, key = key_ref_to_ptr(key_ref); - pks = table_make_digest(PKEY_HASH_SHA256, + pks = table_make_digest(HASH_ALGO_SHA256, (const void *)metadata->verity_table, le32_to_cpu(metadata->header->table_length)); @@ -569,7 +569,7 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) u32 data_block_size; unsigned int major, minor, no_of_args = VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS; - struct fec_header fec; + struct fec_header uninitialized_var(fec); struct fec_ecc_metadata uninitialized_var(ecc); char buf[FEC_ARG_LENGTH], *buf_ptr; unsigned long long tmpll; -- GitLab From d0706bcd6e746ccee1267879a22812842a0630b8 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 21 Mar 2016 10:55:23 -0700 Subject: [PATCH 1028/1442] ANDROID: dm: Mounting root as linear device when verity disabled This CL makes android-verity target to be added as linear dm device if when bootloader is unlocked and verity is disabled. Bug: 27175947 Change-Id: Ic41ca4b8908fb2777263799cf3a3e25934d70f18 Signed-off-by: Badhri Jagan Sridharan --- drivers/md/dm-android-verity.c | 131 +++++++++++++++++++++++++++------ drivers/md/dm-android-verity.h | 3 + drivers/md/dm-linear.c | 2 +- 3 files changed, 113 insertions(+), 23 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index aeb5045830d9..db4ddf789a39 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -43,6 +44,25 @@ static char verifiedbootstate[VERITY_COMMANDLINE_PARAM_LENGTH]; static char veritymode[VERITY_COMMANDLINE_PARAM_LENGTH]; +static bool target_added; +static bool verity_enabled = true; +struct dentry *debug_dir; +static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv); + +static struct target_type android_verity_target = { + .name = "android-verity", + .version = {1, 0, 0}, + .module = THIS_MODULE, + .ctr = android_verity_ctr, + .dtr = verity_dtr, + .map = verity_map, + .status = verity_status, + .ioctl = verity_ioctl, + .merge = verity_merge, + .iterate_devices = verity_iterate_devices, + .io_hints = verity_io_hints, +}; + static int __init verified_boot_state_param(char *line) { strlcpy(verifiedbootstate, line, sizeof(verifiedbootstate)); @@ -549,6 +569,35 @@ static inline bool test_mult_overflow(sector_t a, u32 b) return a > r; } +static int add_as_linear_device(struct dm_target *ti, char *dev) +{ + /*Move to linear mapping defines*/ + char *linear_table_args[DM_LINEAR_ARGS]; + char offset[] = "0"; + int err = 0; + + linear_table_args[0] = dev; + linear_table_args[1] = offset; + + android_verity_target.dtr = linear_target.dtr, + android_verity_target.map = linear_target.map, + android_verity_target.status = linear_target.status, + android_verity_target.ioctl = linear_target.ioctl, + android_verity_target.merge = linear_target.merge, + android_verity_target.iterate_devices = linear_target.iterate_devices, + android_verity_target.io_hints = NULL; + + err = linear_target.ctr(ti, DM_LINEAR_ARGS, linear_table_args); + + if (!err) { + DMINFO("Added android-verity as a linear target"); + target_added = true; + } else + DMERR("Failed to add android-verity as linear target"); + + return err; +} + /* * Target parameters: * Key id of the public key in the system keyring. @@ -613,21 +662,27 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) if (err == VERITY_STATE_DISABLE) { DMERR("Mounting root with verity disabled"); - return -EINVAL; + verity_enabled = false; + /* we would still have to parse the args to figure out + * the data blocks size. Or may be could map the entire + * partition similar to mounting the device. + */ } else if (err) { DMERR("Verity header handle error"); handle_error(); goto free_metadata; } - err = verify_verity_signature(key_id, metadata); + if (!verity_enabled) { + err = verify_verity_signature(key_id, metadata); - if (err) { - DMERR("Signature verification failed"); - handle_error(); - goto free_metadata; - } else - DMINFO("Signature verification success"); + if (err) { + DMERR("Signature verification failed"); + handle_error(); + goto free_metadata; + } else + DMINFO("Signature verification success"); + } table_ptr = metadata->verity_table; @@ -683,6 +738,12 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) /* update target length */ ti->len = data_sectors; + /* Setup linear target and free */ + if (!verity_enabled) { + err = add_as_linear_device(ti, argv[1]); + goto free_metadata; + } + /*substitute data_dev and hash_dev*/ verity_table_args[1] = argv[1]; verity_table_args[2] = argv[1]; @@ -730,6 +791,13 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) err = verity_ctr(ti, no_of_args, verity_table_args); + if (err) + DMERR("android-verity failed to mount as verity target"); + else { + target_added = true; + DMINFO("android-verity mounted as verity target"); + } + free_metadata: kfree(metadata->header); kfree(metadata->verity_table); @@ -737,33 +805,52 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) return err; } -static struct target_type android_verity_target = { - .name = "android-verity", - .version = {1, 0, 0}, - .module = THIS_MODULE, - .ctr = android_verity_ctr, - .dtr = verity_dtr, - .map = verity_map, - .status = verity_status, - .ioctl = verity_ioctl, - .merge = verity_merge, - .iterate_devices = verity_iterate_devices, - .io_hints = verity_io_hints, -}; - static int __init dm_android_verity_init(void) { int r; + struct dentry *file; r = dm_register_target(&android_verity_target); if (r < 0) DMERR("register failed %d", r); + /* Tracks the status of the last added target */ + debug_dir = debugfs_create_dir("android_verity", NULL); + + if (IS_ERR_OR_NULL(debug_dir)) { + DMERR("Cannot create android_verity debugfs directory: %ld", + PTR_ERR(debug_dir)); + goto end; + } + + file = debugfs_create_bool("target_added", S_IRUGO, debug_dir, + (u32 *)&target_added); + + if (IS_ERR_OR_NULL(file)) { + DMERR("Cannot create android_verity debugfs directory: %ld", + PTR_ERR(debug_dir)); + debugfs_remove_recursive(debug_dir); + goto end; + } + + file = debugfs_create_bool("verity_enabled", S_IRUGO, debug_dir, + (u32 *)&verity_enabled); + + if (IS_ERR_OR_NULL(file)) { + DMERR("Cannot create android_verity debugfs directory: %ld", + PTR_ERR(debug_dir)); + debugfs_remove_recursive(debug_dir); + } + +end: return r; } static void __exit dm_android_verity_exit(void) { + if (!IS_ERR_OR_NULL(debug_dir)) + debugfs_remove_recursive(debug_dir); + dm_unregister_target(&android_verity_target); } diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h index 11477ffd2243..2cf7de1b7910 100644 --- a/drivers/md/dm-android-verity.h +++ b/drivers/md/dm-android-verity.h @@ -44,6 +44,8 @@ #define VERITY_DEBUG 0 #define DM_MSG_PREFIX "android-verity" + +#define DM_LINEAR_ARGS 2 /* * There can be two formats. * if fec is present @@ -89,4 +91,5 @@ struct bio_read { int number_of_pages; }; +extern struct target_type linear_target; #endif /* DM_ANDROID_VERITY_H */ diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 4788b0b989a9..6376fd08d2e9 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -159,7 +159,7 @@ static long linear_direct_access(struct dm_target *ti, sector_t sector, return ret; } -static struct target_type linear_target = { +struct target_type linear_target = { .name = "linear", .version = {1, 3, 0}, .module = THIS_MODULE, -- GitLab From 7e70218c269964a3e9139735e75245eaff8a8a88 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 28 Mar 2016 14:41:21 -0700 Subject: [PATCH 1029/1442] ANDROID: dm: Minor cleanup Compacts the linear device arguments removing the unnecessary variables. Bug: 27175947 Change-Id: I157170eebe3c0f89a68ae05870a1060f188d0da0 Signed-off-by: Badhri Jagan Sridharan --- drivers/md/dm-android-verity.c | 7 ++----- drivers/md/dm-android-verity.h | 2 ++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index db4ddf789a39..f6ddbee5e2d3 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -572,13 +572,10 @@ static inline bool test_mult_overflow(sector_t a, u32 b) static int add_as_linear_device(struct dm_target *ti, char *dev) { /*Move to linear mapping defines*/ - char *linear_table_args[DM_LINEAR_ARGS]; - char offset[] = "0"; + char *linear_table_args[DM_LINEAR_ARGS] = {dev, + DM_LINEAR_TARGET_OFFSET}; int err = 0; - linear_table_args[0] = dev; - linear_table_args[1] = offset; - android_verity_target.dtr = linear_target.dtr, android_verity_target.map = linear_target.map, android_verity_target.status = linear_target.status, diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h index 2cf7de1b7910..fe53863c664b 100644 --- a/drivers/md/dm-android-verity.h +++ b/drivers/md/dm-android-verity.h @@ -46,6 +46,8 @@ #define DM_MSG_PREFIX "android-verity" #define DM_LINEAR_ARGS 2 +#define DM_LINEAR_TARGET_OFFSET "0" + /* * There can be two formats. * if fec is present -- GitLab From 67584ff8412b7910e4ca5c8d7a0c36f51b4d3e96 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 5 Apr 2016 11:18:16 -0700 Subject: [PATCH 1030/1442] ANDROID: dm: rename dm-linear methods for dm-android-verity This keeps linear_target as static variable and just exposes the linear target methods for android-verity Cherry-picked: https://android-review.googlesource.com/#/c/212858 Change-Id: I4a377e417b00afd9ecccdb3e605fea31a7df112e Signed-off-by: Badhri Jagan Sridharan (cherry picked from commit a6d1b091f40b25d97849487e29ec097bc5f568dd) --- drivers/md/dm-android-verity.c | 14 +++++++------- drivers/md/dm-android-verity.h | 12 ++++++++++++ drivers/md/dm-linear.c | 26 +++++++++++++------------- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index f6ddbee5e2d3..b7e059595f75 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -576,15 +576,15 @@ static int add_as_linear_device(struct dm_target *ti, char *dev) DM_LINEAR_TARGET_OFFSET}; int err = 0; - android_verity_target.dtr = linear_target.dtr, - android_verity_target.map = linear_target.map, - android_verity_target.status = linear_target.status, - android_verity_target.ioctl = linear_target.ioctl, - android_verity_target.merge = linear_target.merge, - android_verity_target.iterate_devices = linear_target.iterate_devices, + android_verity_target.dtr = dm_linear_dtr, + android_verity_target.map = dm_linear_map, + android_verity_target.status = dm_linear_status, + android_verity_target.ioctl = dm_linear_ioctl, + android_verity_target.merge = dm_linear_merge, + android_verity_target.iterate_devices = dm_linear_iterate_devices, android_verity_target.io_hints = NULL; - err = linear_target.ctr(ti, DM_LINEAR_ARGS, linear_table_args); + err = dm_linear_ctr(ti, DM_LINEAR_ARGS, linear_table_args); if (!err) { DMINFO("Added android-verity as a linear target"); diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h index fe53863c664b..efb796524896 100644 --- a/drivers/md/dm-android-verity.h +++ b/drivers/md/dm-android-verity.h @@ -94,4 +94,16 @@ struct bio_read { }; extern struct target_type linear_target; + +extern void dm_linear_dtr(struct dm_target *ti); +extern int dm_linear_map(struct dm_target *ti, struct bio *bio); +extern void dm_linear_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen); +extern int dm_linear_ioctl(struct dm_target *ti, unsigned int cmd, + unsigned long arg); +extern int dm_linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size); +extern int dm_linear_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data); +extern int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv); #endif /* DM_ANDROID_VERITY_H */ diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 6376fd08d2e9..1e6375067de8 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -25,7 +25,7 @@ struct linear_c { /* * Construct a linear mapping: */ -static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) +int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct linear_c *lc; unsigned long long tmp; @@ -67,7 +67,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) return ret; } -static void linear_dtr(struct dm_target *ti) +void dm_linear_dtr(struct dm_target *ti) { struct linear_c *lc = (struct linear_c *) ti->private; @@ -92,14 +92,14 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) linear_map_sector(ti, bio->bi_iter.bi_sector); } -static int linear_map(struct dm_target *ti, struct bio *bio) +int dm_linear_map(struct dm_target *ti, struct bio *bio) { linear_map_bio(ti, bio); return DM_MAPIO_REMAPPED; } -static void linear_status(struct dm_target *ti, status_type_t type, +void dm_linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; @@ -116,7 +116,7 @@ static void linear_status(struct dm_target *ti, status_type_t type, } } -static int linear_prepare_ioctl(struct dm_target *ti, +static int dm_linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev, fmode_t *mode) { struct linear_c *lc = (struct linear_c *) ti->private; @@ -133,7 +133,7 @@ static int linear_prepare_ioctl(struct dm_target *ti, return 0; } -static int linear_iterate_devices(struct dm_target *ti, +int dm_linear_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { struct linear_c *lc = ti->private; @@ -159,16 +159,16 @@ static long linear_direct_access(struct dm_target *ti, sector_t sector, return ret; } -struct target_type linear_target = { +static struct target_type linear_target = { .name = "linear", .version = {1, 3, 0}, .module = THIS_MODULE, - .ctr = linear_ctr, - .dtr = linear_dtr, - .map = linear_map, - .status = linear_status, - .prepare_ioctl = linear_prepare_ioctl, - .iterate_devices = linear_iterate_devices, + .ctr = dm_linear_ctr, + .dtr = dm_linear_dtr, + .map = dm_linear_map, + .status = dm_linear_status, + .prepare_ioctl = dm_linear_prepare_ioctl, + .iterate_devices = dm_linear_iterate_devices, .direct_access = linear_direct_access, }; -- GitLab From 0b768a416ebe591ab2592969e75284c894a4efe2 Mon Sep 17 00:00:00 2001 From: Jeremy Compostella Date: Fri, 15 Apr 2016 13:32:54 +0200 Subject: [PATCH 1031/1442] ANDROID: dm: use name_to_dev_t This patch makes android_verity_ctr() parse its block device string parameter with name_to_dev_t(). It allows the use of less hardware related block device reference like PARTUUID for instance. Change-Id: Idb84453e70cc11abd5ef3a0adfbb16f8b5feaf07 Signed-off-by: Jeremy Compostella --- drivers/md/dm-android-verity.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index b7e059595f75..9c26cbb5f179 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -613,8 +613,7 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) /* One for specifying number of opt args and one for mode */ sector_t data_sectors; u32 data_block_size; - unsigned int major, minor, - no_of_args = VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS; + unsigned int no_of_args = VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS; struct fec_header uninitialized_var(fec); struct fec_ecc_metadata uninitialized_var(ecc); char buf[FEC_ARG_LENGTH], *buf_ptr; @@ -630,13 +629,11 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) key_id = argv[0]; strreplace(argv[0], '#', ' '); - if (sscanf(argv[1], "%u:%u%c", &major, &minor, &dummy) == 2) { - dev = MKDEV(major, minor); - if (MAJOR(dev) != major || MINOR(dev) != minor) { - DMERR("Incorrect bdev major minor number"); - handle_error(); - return -EOVERFLOW; - } + dev = name_to_dev_t(argv[1]); + if (!dev) { + DMERR("no dev found for %s", argv[1]); + handle_error(); + return -EINVAL; } DMINFO("key:%s dev:%s", argv[0], argv[1]); -- GitLab From e96affa0f09d16e79fc55e34516d1a71ae2a0e29 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 20 May 2016 16:44:19 -0700 Subject: [PATCH 1032/1442] ANDROID: dm: fix signature verification flag The bug was that the signature verification was only happening when verity was disabled. It should always happen when verity is enabled. Signed-off-by: Badhri Jagan Sridharan Change-Id: I2d9354e240d36ea06fc68c2d18d8e87b823a4c2f (cherry picked from commit 5364b5ca0b1a12a58283b51408e43fc36d4e4fe7) --- drivers/md/dm-android-verity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index 9c26cbb5f179..00275a986d03 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -667,7 +667,7 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto free_metadata; } - if (!verity_enabled) { + if (verity_enabled) { err = verify_verity_signature(key_id, metadata); if (err) { -- GitLab From adb82b96cbffa39521c2220dc24e22d89a70e80d Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 20 May 2016 16:45:45 -0700 Subject: [PATCH 1033/1442] ANDROID: dm: use default verity public key If the dm-android-verity target does not provide a default key try using the default public key from the system keyring. The defualt verity keyid is passed as a kernel command line argument veritykeyid=. The order of the dm-android-verity params have been reversed to facilitate the change. Old format example: dm="system none ro,0 1 android-verity Android:#7e4333f9bba00adfe0ede979e28ed1920492b40f /dev/mmcblk0p43" New formats supported: dm="system none ro,0 1 android-verity /dev/mmcblk0p43 Android:#7e4333f9bba00adfe0ede979e28ed1920492b40f" (or) dm="system none ro,0 1 android-verity /dev/mmcblk0p43" when veritykeyid= is set in the kernel command line. BUG: 28384658 Signed-off-by: Badhri Jagan Sridharan Change-Id: I506c89b053d835ab579e703eef2bc1f8487250de (cherry picked from commit c5c74d0327729f35b576564976885596c6d0e7fb) --- drivers/md/dm-android-verity.c | 67 ++++++++++++++++++++++++---------- drivers/md/dm-android-verity.h | 16 ++++++++ 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index 00275a986d03..097fb2b1de89 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -43,6 +43,7 @@ static char verifiedbootstate[VERITY_COMMANDLINE_PARAM_LENGTH]; static char veritymode[VERITY_COMMANDLINE_PARAM_LENGTH]; +static char veritykeyid[VERITY_DEFAULT_KEY_ID_LENGTH]; static bool target_added; static bool verity_enabled = true; @@ -79,6 +80,19 @@ static int __init verity_mode_param(char *line) __setup("androidboot.veritymode=", verity_mode_param); +static int __init verity_keyid_param(char *line) +{ + strlcpy(veritykeyid, line, sizeof(veritykeyid)); + return 1; +} + +__setup("veritykeyid=", verity_keyid_param); + +static inline bool default_verity_key_id(void) +{ + return veritykeyid[0] != '\0'; +} + static int table_extract_mpi_array(struct public_key_signature *pks, const void *data, size_t len) { @@ -608,7 +622,7 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) dev_t uninitialized_var(dev); struct android_metadata *uninitialized_var(metadata); int err = 0, i, mode; - char *key_id, *table_ptr, dummy, + char *key_id, *table_ptr, dummy, *target_device, *verity_table_args[VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS]; /* One for specifying number of opt args and one for mode */ sector_t data_sectors; @@ -619,24 +633,34 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) char buf[FEC_ARG_LENGTH], *buf_ptr; unsigned long long tmpll; - if (argc != 2) { + if (argc == 1) { + /* Use the default keyid */ + if (default_verity_key_id()) + key_id = veritykeyid; + else { + DMERR("veritykeyid= is not set"); + handle_error(); + return -EINVAL; + } + } else if (argc == 2) + key_id = argv[1]; + else { DMERR("Incorrect number of arguments"); handle_error(); return -EINVAL; } - /* should come as one of the arguments for the verity target */ - key_id = argv[0]; - strreplace(argv[0], '#', ' '); + strreplace(key_id, '#', ' '); + target_device = argv[0]; - dev = name_to_dev_t(argv[1]); + dev = name_to_dev_t(target_device); if (!dev) { - DMERR("no dev found for %s", argv[1]); + DMERR("no dev found for %s", target_device); handle_error(); return -EINVAL; } - DMINFO("key:%s dev:%s", argv[0], argv[1]); + DMINFO("key:%s dev:%s", key_id, target_device); if (extract_fec_header(dev, &fec, &ecc)) { DMERR("Error while extracting fec header"); @@ -734,30 +758,33 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) /* Setup linear target and free */ if (!verity_enabled) { - err = add_as_linear_device(ti, argv[1]); + err = add_as_linear_device(ti, target_device); goto free_metadata; } /*substitute data_dev and hash_dev*/ - verity_table_args[1] = argv[1]; - verity_table_args[2] = argv[1]; + verity_table_args[1] = target_device; + verity_table_args[2] = target_device; mode = verity_mode(); if (ecc.valid && IS_BUILTIN(CONFIG_DM_VERITY_FEC)) { if (mode) { err = snprintf(buf, FEC_ARG_LENGTH, - "%u %s " VERITY_TABLE_OPT_FEC_FORMAT, - 1 + VERITY_TABLE_OPT_FEC_ARGS, - mode == DM_VERITY_MODE_RESTART ? - VERITY_TABLE_OPT_RESTART : VERITY_TABLE_OPT_LOGGING, - argv[1], ecc.start / FEC_BLOCK_SIZE, ecc.blocks, - ecc.roots); + "%u %s " VERITY_TABLE_OPT_FEC_FORMAT, + 1 + VERITY_TABLE_OPT_FEC_ARGS, + mode == DM_VERITY_MODE_RESTART ? + VERITY_TABLE_OPT_RESTART : + VERITY_TABLE_OPT_LOGGING, + target_device, + ecc.start / FEC_BLOCK_SIZE, ecc.blocks, + ecc.roots); } else { err = snprintf(buf, FEC_ARG_LENGTH, - "%u " VERITY_TABLE_OPT_FEC_FORMAT, - VERITY_TABLE_OPT_FEC_ARGS, argv[1], - ecc.start / FEC_BLOCK_SIZE, ecc.blocks, ecc.roots); + "%u " VERITY_TABLE_OPT_FEC_FORMAT, + VERITY_TABLE_OPT_FEC_ARGS, target_device, + ecc.start / FEC_BLOCK_SIZE, ecc.blocks, + ecc.roots); } } else if (mode) { err = snprintf(buf, FEC_ARG_LENGTH, diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h index efb796524896..43655ee0f813 100644 --- a/drivers/md/dm-android-verity.h +++ b/drivers/md/dm-android-verity.h @@ -27,6 +27,22 @@ #define VERITY_TABLE_ARGS 10 #define VERITY_COMMANDLINE_PARAM_LENGTH 20 +/* + * : is the format for the identifier. + * subject can either be the Common Name(CN) + Organization Name(O) or + * just the CN if the it is prefixed with O + * From https://tools.ietf.org/html/rfc5280#appendix-A + * ub-organization-name-length INTEGER ::= 64 + * ub-common-name-length INTEGER ::= 64 + * + * http://lxr.free-electrons.com/source/crypto/asymmetric_keys/x509_cert_parser.c?v=3.9#L278 + * ctx->o_size + 2 + ctx->cn_size + 1 + * + 41 characters for ":" and sha1 id + * 64 + 2 + 64 + 1 + 1 + 40 (172) + * setting VERITY_DEFAULT_KEY_ID_LENGTH to 200 characters. + */ +#define VERITY_DEFAULT_KEY_ID_LENGTH 200 + #define FEC_MAGIC 0xFECFECFE #define FEC_BLOCK_SIZE (4 * 1024) #define FEC_VERSION 0 -- GitLab From c68a61580a6c4dcd4c3a96ce9de987a88ebd9c5d Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 17 Jun 2016 18:54:35 -0700 Subject: [PATCH 1034/1442] ANDROID: dm: mount as linear target if eng build eng builds dont have verity enabled i.e it does even have verity metadata appended to the parition. Therefore add rootdev as linear device and map the entire partition if build variant is "eng". (Cherry-picked based on https://partner-android-review.git.corp.google.com/#/c/618690/) BUG: 29276559 Signed-off-by: Badhri Jagan Sridharan Change-Id: I8f5c2289b842b820ca04f5773525e5449bb3f355 --- drivers/md/dm-android-verity.c | 62 +++++++++++++++++++++++++++++++--- drivers/md/dm-android-verity.h | 1 + 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index 097fb2b1de89..e1a8e284e7e4 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -44,6 +44,7 @@ static char verifiedbootstate[VERITY_COMMANDLINE_PARAM_LENGTH]; static char veritymode[VERITY_COMMANDLINE_PARAM_LENGTH]; static char veritykeyid[VERITY_DEFAULT_KEY_ID_LENGTH]; +static char buildvariant[BUILD_VARIANT]; static bool target_added; static bool verity_enabled = true; @@ -88,11 +89,26 @@ static int __init verity_keyid_param(char *line) __setup("veritykeyid=", verity_keyid_param); +static int __init verity_buildvariant(char *line) +{ + strlcpy(buildvariant, line, sizeof(buildvariant)); + return 1; +} + +__setup("buildvariant=", verity_buildvariant); + static inline bool default_verity_key_id(void) { return veritykeyid[0] != '\0'; } +static inline bool is_eng(void) +{ + static const char typeeng[] = "eng"; + + return !strncmp(buildvariant, typeeng, sizeof(typeeng)); +} + static int table_extract_mpi_array(struct public_key_signature *pks, const void *data, size_t len) { @@ -262,7 +278,7 @@ static int extract_fec_header(dev_t dev, struct fec_header *fec, bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); - if (IS_ERR(bdev)) { + if (IS_ERR_OR_NULL(bdev)) { DMERR("bdev get error"); return PTR_ERR(bdev); } @@ -323,6 +339,24 @@ static void find_metadata_offset(struct fec_header *fec, *metadata_offset = device_size - VERITY_METADATA_SIZE; } +static int find_size(dev_t dev, u64 *device_size) +{ + struct block_device *bdev; + + bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); + if (IS_ERR_OR_NULL(bdev)) { + DMERR("blkdev_get_by_dev failed"); + return PTR_ERR(bdev); + } + + *device_size = i_size_read(bdev->bd_inode); + *device_size >>= SECTOR_SHIFT; + + DMINFO("blkdev size in sectors: %llu", *device_size); + blkdev_put(bdev, FMODE_READ); + return 0; +} + static struct android_metadata *extract_metadata(dev_t dev, struct fec_header *fec) { @@ -337,7 +371,7 @@ static struct android_metadata *extract_metadata(dev_t dev, bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); - if (IS_ERR(bdev)) { + if (IS_ERR_OR_NULL(bdev)) { DMERR("blkdev_get_by_dev failed"); return ERR_CAST(bdev); } @@ -632,12 +666,13 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) struct fec_ecc_metadata uninitialized_var(ecc); char buf[FEC_ARG_LENGTH], *buf_ptr; unsigned long long tmpll; + u64 device_size; if (argc == 1) { /* Use the default keyid */ if (default_verity_key_id()) key_id = veritykeyid; - else { + else if (!is_eng()) { DMERR("veritykeyid= is not set"); handle_error(); return -EINVAL; @@ -650,7 +685,6 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) return -EINVAL; } - strreplace(key_id, '#', ' '); target_device = argv[0]; dev = name_to_dev_t(target_device); @@ -660,6 +694,26 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) return -EINVAL; } + if (is_eng()) { + err = find_size(dev, &device_size); + if (err) { + DMERR("error finding bdev size"); + handle_error(); + return err; + } + + ti->len = device_size; + err = add_as_linear_device(ti, target_device); + if (err) { + handle_error(); + return err; + } + verity_enabled = false; + return 0; + } + + strreplace(key_id, '#', ' '); + DMINFO("key:%s dev:%s", key_id, target_device); if (extract_fec_header(dev, &fec, &ecc)) { diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h index 43655ee0f813..782e1c815c67 100644 --- a/drivers/md/dm-android-verity.h +++ b/drivers/md/dm-android-verity.h @@ -26,6 +26,7 @@ #define VERITY_METADATA_SIZE (8 * DATA_BLOCK_SIZE) #define VERITY_TABLE_ARGS 10 #define VERITY_COMMANDLINE_PARAM_LENGTH 20 +#define BUILD_VARIANT 20 /* * : is the format for the identifier. -- GitLab From d5943e06661dd67bb12585914cc7e951f9a746c3 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 27 Jun 2016 16:25:55 -0700 Subject: [PATCH 1035/1442] ANDROID: dm: allow adb disable-verity only in userdebug adb disable-verity was allowed when the phone is in the unlocked state. Since the driver is now aware of the build variant, honor "adb disable-verity" only in userdebug builds. (Cherry-picked from https://partner-android-review.git.corp.google.com/#/c/622117) BUG: 29276559 Signed-off-by: Badhri Jagan Sridharan Change-Id: I7ce9f38d8c7a62361392c5a8ccebb288f8a3a2ea --- drivers/md/dm-android-verity.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index e1a8e284e7e4..999e75bf2ba0 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -109,6 +109,14 @@ static inline bool is_eng(void) return !strncmp(buildvariant, typeeng, sizeof(typeeng)); } +static inline bool is_userdebug(void) +{ + static const char typeuserdebug[] = "userdebug"; + + return !strncmp(buildvariant, typeuserdebug, sizeof(typeuserdebug)); +} + + static int table_extract_mpi_array(struct public_key_signature *pks, const void *data, size_t len) { @@ -499,19 +507,6 @@ const char *find_dt_value(const char *name) return value; } -static bool is_unlocked(void) -{ - static const char unlocked[] = "orange"; - static const char verified_boot_prop[] = "verifiedbootstate"; - const char *value; - - value = find_dt_value(verified_boot_prop); - if (!value) - value = verifiedbootstate; - - return !strncmp(value, unlocked, sizeof(unlocked) - 1); -} - static int verity_mode(void) { static const char enforcing[] = "enforcing"; @@ -531,7 +526,7 @@ static int verify_header(struct android_metadata_header *header) { int retval = -EINVAL; - if (is_unlocked() && le32_to_cpu(header->magic_number) == + if (is_userdebug() && le32_to_cpu(header->magic_number) == VERITY_METADATA_MAGIC_DISABLE) { retval = VERITY_STATE_DISABLE; return retval; -- GitLab From 76350642a60db8205707b28d71d508d6ab1208ce Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 6 Jul 2016 17:16:19 -0700 Subject: [PATCH 1036/1442] ANDROID: dm: android-verity: Verify header before fetching table Move header validation logic before reading the verity_table as an invalid header implies the table is invalid as well. (Cherry-picked from: https://partner-android-review.git.corp.google.com/#/c/625203) BUG: 29940612 Signed-off-by: Badhri Jagan Sridharan Change-Id: Ib34d25c0854202f3e70df0a6d0ef1d96f0250c8e --- drivers/md/dm-android-verity.c | 140 +++++++++++++++++---------------- 1 file changed, 71 insertions(+), 69 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index 999e75bf2ba0..1f4eb099209d 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -365,12 +365,38 @@ static int find_size(dev_t dev, u64 *device_size) return 0; } -static struct android_metadata *extract_metadata(dev_t dev, - struct fec_header *fec) +static int verify_header(struct android_metadata_header *header) +{ + int retval = -EINVAL; + + if (is_userdebug() && le32_to_cpu(header->magic_number) == + VERITY_METADATA_MAGIC_DISABLE) + return VERITY_STATE_DISABLE; + + if (!(le32_to_cpu(header->magic_number) == + VERITY_METADATA_MAGIC_NUMBER) || + (le32_to_cpu(header->magic_number) == + VERITY_METADATA_MAGIC_DISABLE)) { + DMERR("Incorrect magic number"); + return retval; + } + + if (le32_to_cpu(header->protocol_version) != + VERITY_METADATA_VERSION) { + DMERR("Unsupported version %u", + le32_to_cpu(header->protocol_version)); + return retval; + } + + return 0; +} + +static int extract_metadata(dev_t dev, struct fec_header *fec, + struct android_metadata **metadata, + bool *verity_enabled) { struct block_device *bdev; struct android_metadata_header *header; - struct android_metadata *uninitialized_var(metadata); int i; u32 table_length, copy_length, offset; u64 metadata_offset; @@ -381,7 +407,7 @@ static struct android_metadata *extract_metadata(dev_t dev, if (IS_ERR_OR_NULL(bdev)) { DMERR("blkdev_get_by_dev failed"); - return ERR_CAST(bdev); + return -ENODEV; } find_metadata_offset(fec, bdev, &metadata_offset); @@ -399,7 +425,6 @@ static struct android_metadata *extract_metadata(dev_t dev, (1 << SECTOR_SHIFT), VERITY_METADATA_SIZE); if (err) { DMERR("Error while reading verity metadata"); - metadata = ERR_PTR(err); goto blkdev_release; } @@ -418,24 +443,42 @@ static struct android_metadata *extract_metadata(dev_t dev, le32_to_cpu(header->protocol_version), le32_to_cpu(header->table_length)); - metadata = kzalloc(sizeof(*metadata), GFP_KERNEL); - if (!metadata) { + err = verify_header(header); + + if (err == VERITY_STATE_DISABLE) { + DMERR("Mounting root with verity disabled"); + *verity_enabled = false; + /* we would still have to read the metadata to figure out + * the data blocks size. Or may be could map the entire + * partition similar to mounting the device. + * + * Reset error as well as the verity_enabled flag is changed. + */ + err = 0; + } else if (err) + goto free_header; + + *metadata = kzalloc(sizeof(**metadata), GFP_KERNEL); + if (!*metadata) { DMERR("kzalloc for metadata failed"); err = -ENOMEM; goto free_header; } - metadata->header = header; + (*metadata)->header = header; table_length = le32_to_cpu(header->table_length); if (table_length == 0 || table_length > (VERITY_METADATA_SIZE - - sizeof(struct android_metadata_header))) + sizeof(struct android_metadata_header))) { + DMERR("table_length too long"); + err = -EINVAL; goto free_metadata; + } - metadata->verity_table = kzalloc(table_length + 1, GFP_KERNEL); + (*metadata)->verity_table = kzalloc(table_length + 1, GFP_KERNEL); - if (!metadata->verity_table) { + if (!(*metadata)->verity_table) { DMERR("kzalloc verity_table failed"); err = -ENOMEM; goto free_metadata; @@ -443,13 +486,15 @@ static struct android_metadata *extract_metadata(dev_t dev, if (sizeof(struct android_metadata_header) + table_length <= PAGE_SIZE) { - memcpy(metadata->verity_table, page_address(payload.page_io[0]) + memcpy((*metadata)->verity_table, + page_address(payload.page_io[0]) + sizeof(struct android_metadata_header), table_length); } else { copy_length = PAGE_SIZE - sizeof(struct android_metadata_header); - memcpy(metadata->verity_table, page_address(payload.page_io[0]) + memcpy((*metadata)->verity_table, + page_address(payload.page_io[0]) + sizeof(struct android_metadata_header), copy_length); table_length -= copy_length; @@ -457,13 +502,13 @@ static struct android_metadata *extract_metadata(dev_t dev, i = 1; while (table_length != 0) { if (table_length > PAGE_SIZE) { - memcpy(metadata->verity_table + offset, + memcpy((*metadata)->verity_table + offset, page_address(payload.page_io[i]), PAGE_SIZE); offset += PAGE_SIZE; table_length -= PAGE_SIZE; } else { - memcpy(metadata->verity_table + offset, + memcpy((*metadata)->verity_table + offset, page_address(payload.page_io[i]), table_length); table_length = 0; @@ -471,25 +516,23 @@ static struct android_metadata *extract_metadata(dev_t dev, i++; } } - metadata->verity_table[table_length] = '\0'; + (*metadata)->verity_table[table_length] = '\0'; + DMINFO("verity_table: %s", (*metadata)->verity_table); goto free_payload; free_metadata: - kfree(metadata); + kfree(*metadata); free_header: kfree(header); - metadata = ERR_PTR(err); free_payload: for (i = 0; i < payload.number_of_pages; i++) if (payload.page_io[i]) __free_page(payload.page_io[i]); kfree(payload.page_io); - - DMINFO("verity_table: %s", metadata->verity_table); blkdev_release: blkdev_put(bdev, FMODE_READ); - return metadata; + return err; } /* helper functions to extract properties from dts */ @@ -522,34 +565,6 @@ static int verity_mode(void) return DM_VERITY_MODE_EIO; } -static int verify_header(struct android_metadata_header *header) -{ - int retval = -EINVAL; - - if (is_userdebug() && le32_to_cpu(header->magic_number) == - VERITY_METADATA_MAGIC_DISABLE) { - retval = VERITY_STATE_DISABLE; - return retval; - } - - if (!(le32_to_cpu(header->magic_number) == - VERITY_METADATA_MAGIC_NUMBER) || - (le32_to_cpu(header->magic_number) == - VERITY_METADATA_MAGIC_DISABLE)) { - DMERR("Incorrect magic number"); - return retval; - } - - if (le32_to_cpu(header->protocol_version) != - VERITY_METADATA_VERSION) { - DMERR("Unsupported version %u", - le32_to_cpu(header->protocol_version)); - return retval; - } - - return 0; -} - static int verify_verity_signature(char *key_id, struct android_metadata *metadata) { @@ -649,7 +664,7 @@ static int add_as_linear_device(struct dm_target *ti, char *dev) static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) { dev_t uninitialized_var(dev); - struct android_metadata *uninitialized_var(metadata); + struct android_metadata *metadata = NULL; int err = 0, i, mode; char *key_id, *table_ptr, dummy, *target_device, *verity_table_args[VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS]; @@ -717,26 +732,11 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) return -EINVAL; } - metadata = extract_metadata(dev, &fec); + err = extract_metadata(dev, &fec, &metadata, &verity_enabled); - if (IS_ERR(metadata)) { + if (err) { DMERR("Error while extracting metadata"); handle_error(); - return -EINVAL; - } - - err = verify_header(metadata->header); - - if (err == VERITY_STATE_DISABLE) { - DMERR("Mounting root with verity disabled"); - verity_enabled = false; - /* we would still have to parse the args to figure out - * the data blocks size. Or may be could map the entire - * partition similar to mounting the device. - */ - } else if (err) { - DMERR("Verity header handle error"); - handle_error(); goto free_metadata; } @@ -869,8 +869,10 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) } free_metadata: - kfree(metadata->header); - kfree(metadata->verity_table); + if (metadata) { + kfree(metadata->header); + kfree(metadata->verity_table); + } kfree(metadata); return err; } -- GitLab From 36759d569e2fbcfa2029118a5a893bdad56d7c37 Mon Sep 17 00:00:00 2001 From: Jeremy Compostella Date: Tue, 10 May 2016 13:10:20 +0200 Subject: [PATCH 1037/1442] ANDROID: dm verity fec: pack the fec_header structure The fec_header structure is generated build time and stored on disk. The fec_header might be build on a 64 bits machine while it is read per a 32 bits device or the other way around. In such situations, the fec_header fields are not aligned as expected by the device and it fails to read the fec_header structure. This patch makes the fec_header packed. Change-Id: Idb84453e70cc11abd5ef3a0adfbb16f8b5feaf06 Signed-off-by: Jeremy Compostella --- drivers/md/dm-android-verity.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h index 782e1c815c67..f43b02fbb475 100644 --- a/drivers/md/dm-android-verity.h +++ b/drivers/md/dm-android-verity.h @@ -72,9 +72,6 @@ * if fec is not present * */ -/* TODO: rearrange structure to reduce memory holes - * depends on userspace change. - */ struct fec_header { __le32 magic; __le32 version; @@ -83,7 +80,7 @@ struct fec_header { __le32 fec_size; __le64 inp_size; u8 hash[SHA256_DIGEST_SIZE]; -}; +} __attribute__((packed)); struct android_metadata_header { __le32 magic_number; -- GitLab From 424861b54be576896b8c9cdc63596b2168aee05c Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 9 Aug 2016 12:47:37 -0700 Subject: [PATCH 1038/1442] ANDROID: dm-verity: adopt changes made to dm callbacks v4.4 introduced changes to the callbacks used for dm-linear and dm-verity-target targets. Move to those headers in dm-android-verity. Verified on hikey while having BOARD_USES_RECOVERY_AS_BOOT := true BOARD_BUILD_SYSTEM_ROOT_IMAGE := true BUG: 27339727 Signed-off-by: Badhri Jagan Sridharan Change-Id: Ic64950c3b55f0a6eaa570bcedc2ace83bbf3005e --- drivers/md/dm-android-verity.c | 12 +++++------- drivers/md/dm-android-verity.h | 6 ++---- drivers/md/dm-linear.c | 2 +- drivers/md/dm-verity-target.c | 2 +- drivers/md/dm-verity.h | 6 ++---- 5 files changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index 1f4eb099209d..15ce2a81c1f4 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -59,8 +59,7 @@ static struct target_type android_verity_target = { .dtr = verity_dtr, .map = verity_map, .status = verity_status, - .ioctl = verity_ioctl, - .merge = verity_merge, + .prepare_ioctl = verity_prepare_ioctl, .iterate_devices = verity_iterate_devices, .io_hints = verity_io_hints, }; @@ -637,8 +636,7 @@ static int add_as_linear_device(struct dm_target *ti, char *dev) android_verity_target.dtr = dm_linear_dtr, android_verity_target.map = dm_linear_map, android_verity_target.status = dm_linear_status, - android_verity_target.ioctl = dm_linear_ioctl, - android_verity_target.merge = dm_linear_merge, + android_verity_target.prepare_ioctl = dm_linear_prepare_ioctl, android_verity_target.iterate_devices = dm_linear_iterate_devices, android_verity_target.io_hints = NULL; @@ -676,7 +674,7 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) struct fec_ecc_metadata uninitialized_var(ecc); char buf[FEC_ARG_LENGTH], *buf_ptr; unsigned long long tmpll; - u64 device_size; + u64 uninitialized_var(device_size); if (argc == 1) { /* Use the default keyid */ @@ -896,7 +894,7 @@ static int __init dm_android_verity_init(void) } file = debugfs_create_bool("target_added", S_IRUGO, debug_dir, - (u32 *)&target_added); + &target_added); if (IS_ERR_OR_NULL(file)) { DMERR("Cannot create android_verity debugfs directory: %ld", @@ -906,7 +904,7 @@ static int __init dm_android_verity_init(void) } file = debugfs_create_bool("verity_enabled", S_IRUGO, debug_dir, - (u32 *)&verity_enabled); + &verity_enabled); if (IS_ERR_OR_NULL(file)) { DMERR("Cannot create android_verity debugfs directory: %ld", diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h index f43b02fbb475..0c7ff6afec69 100644 --- a/drivers/md/dm-android-verity.h +++ b/drivers/md/dm-android-verity.h @@ -113,10 +113,8 @@ extern void dm_linear_dtr(struct dm_target *ti); extern int dm_linear_map(struct dm_target *ti, struct bio *bio); extern void dm_linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen); -extern int dm_linear_ioctl(struct dm_target *ti, unsigned int cmd, - unsigned long arg); -extern int dm_linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size); +extern int dm_linear_prepare_ioctl(struct dm_target *ti, + struct block_device **bdev, fmode_t *mode); extern int dm_linear_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data); extern int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 1e6375067de8..8480c84139ea 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -116,7 +116,7 @@ void dm_linear_status(struct dm_target *ti, status_type_t type, } } -static int dm_linear_prepare_ioctl(struct dm_target *ti, +int dm_linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev, fmode_t *mode) { struct linear_c *lc = (struct linear_c *) ti->private; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 10362700a00c..463f9a4ad2de 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -656,7 +656,7 @@ void verity_status(struct dm_target *ti, status_type_t type, } } -static int verity_prepare_ioctl(struct dm_target *ti, +int verity_prepare_ioctl(struct dm_target *ti, struct block_device **bdev, fmode_t *mode) { struct dm_verity *v = ti->private; diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index d9cf5e4939eb..75effca400a3 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -128,10 +128,8 @@ extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, extern void verity_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen); -extern int verity_ioctl(struct dm_target *ti, unsigned cmd, - unsigned long arg); -extern int verity_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size); +extern int verity_prepare_ioctl(struct dm_target *ti, + struct block_device **bdev, fmode_t *mode); extern int verity_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data); extern void verity_io_hints(struct dm_target *ti, struct queue_limits *limits); -- GitLab From 921945748d6c00d65e81f05a8825b5f3c2975d67 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 23 Aug 2016 11:32:37 -0700 Subject: [PATCH 1039/1442] ANDROID: dm: android-verity: Allow android-verity to be compiled as an independent module Exports the device mapper callbacks of linear and dm-verity-target methods. Signed-off-by: Badhri Jagan Sridharan Change-Id: I0358be0615c431dce3cc78575aaac4ccfe3aacd7 --- drivers/md/Kconfig | 3 ++- drivers/md/Makefile | 5 +---- drivers/md/dm-linear.c | 6 ++++++ drivers/md/dm-verity-target.c | 7 +++++++ 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 8f85df687f95..f550da3beabd 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -501,7 +501,7 @@ config DM_LOG_WRITES If unsure, say N. config DM_ANDROID_VERITY - bool "Android verity target support" + tristate "Android verity target support" depends on DM_VERITY depends on X509_CERTIFICATE_PARSER depends on SYSTEM_TRUSTED_KEYRING @@ -509,6 +509,7 @@ config DM_ANDROID_VERITY depends on KEYS depends on ASYMMETRIC_KEY_TYPE depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE + depends on MD_LINEAR ---help--- This device-mapper target is virtually a VERITY target. This target is setup by reading the metadata contents piggybacked diff --git a/drivers/md/Makefile b/drivers/md/Makefile index fa5941fc4c76..f26ce41af389 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o obj-$(CONFIG_DM_ERA) += dm-era.o obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o +obj-$(CONFIG_DM_ANDROID_VERITY) += dm-android-verity.o ifeq ($(CONFIG_DM_UEVENT),y) dm-mod-objs += dm-uevent.o @@ -67,7 +68,3 @@ endif ifeq ($(CONFIG_DM_VERITY_FEC),y) dm-verity-objs += dm-verity-fec.o endif - -ifeq ($(CONFIG_DM_ANDROID_VERITY),y) -dm-verity-objs += dm-android-verity.o -endif diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 8480c84139ea..ee2f40a91132 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -66,6 +66,7 @@ int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) kfree(lc); return ret; } +EXPORT_SYMBOL_GPL(dm_linear_ctr); void dm_linear_dtr(struct dm_target *ti) { @@ -74,6 +75,7 @@ void dm_linear_dtr(struct dm_target *ti) dm_put_device(ti, lc->dev); kfree(lc); } +EXPORT_SYMBOL_GPL(dm_linear_dtr); static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector) { @@ -98,6 +100,7 @@ int dm_linear_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } +EXPORT_SYMBOL_GPL(dm_linear_map); void dm_linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) @@ -115,6 +118,7 @@ void dm_linear_status(struct dm_target *ti, status_type_t type, break; } } +EXPORT_SYMBOL_GPL(dm_linear_status); int dm_linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev, fmode_t *mode) @@ -132,6 +136,7 @@ int dm_linear_prepare_ioctl(struct dm_target *ti, return 1; return 0; } +EXPORT_SYMBOL_GPL(dm_linear_prepare_ioctl); int dm_linear_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) @@ -140,6 +145,7 @@ int dm_linear_iterate_devices(struct dm_target *ti, return fn(ti, lc->dev, lc->start, ti->len, data); } +EXPORT_SYMBOL_GPL(dm_linear_iterate_devices); static long linear_direct_access(struct dm_target *ti, sector_t sector, void **kaddr, pfn_t *pfn, long size) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 463f9a4ad2de..b53539e26bc2 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -592,6 +592,7 @@ int verity_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } +EXPORT_SYMBOL_GPL(verity_map); /* * Status: V (valid) or C (corruption found) @@ -655,6 +656,7 @@ void verity_status(struct dm_target *ti, status_type_t type, break; } } +EXPORT_SYMBOL_GPL(verity_status); int verity_prepare_ioctl(struct dm_target *ti, struct block_device **bdev, fmode_t *mode) @@ -668,6 +670,7 @@ int verity_prepare_ioctl(struct dm_target *ti, return 1; return 0; } +EXPORT_SYMBOL_GPL(verity_prepare_ioctl); int verity_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) @@ -676,6 +679,7 @@ int verity_iterate_devices(struct dm_target *ti, return fn(ti, v->data_dev, v->data_start, ti->len, data); } +EXPORT_SYMBOL_GPL(verity_iterate_devices); void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) { @@ -689,6 +693,7 @@ void verity_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, limits->logical_block_size); } +EXPORT_SYMBOL_GPL(verity_io_hints); void verity_dtr(struct dm_target *ti) { @@ -719,6 +724,7 @@ void verity_dtr(struct dm_target *ti) kfree(v); } +EXPORT_SYMBOL_GPL(verity_dtr); static int verity_alloc_zero_digest(struct dm_verity *v) { @@ -1053,6 +1059,7 @@ int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) return r; } +EXPORT_SYMBOL_GPL(verity_ctr); static struct target_type verity_target = { .name = "verity", -- GitLab From 8613d932c763ec6fddb544aadfb87cf1ed3d595e Mon Sep 17 00:00:00 2001 From: Mohamad Ayyash Date: Wed, 11 May 2016 13:18:35 -0700 Subject: [PATCH 1040/1442] ANDROID: Don't show empty tag stats for unprivileged uids BUG: 27577101 BUG: 27532522 Change-Id: Ibee3c5d224f139b9312a40acb203e87aa7060797 Signed-off-by: Mohamad Ayyash --- net/netfilter/xt_qtaguid.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index e2e7d54f9bb1..3bf0c59dab2f 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1946,7 +1946,7 @@ static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) ); f_count = atomic_long_read( &sock_tag_entry->socket->file->f_count); - seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u " + seq_printf(m, "sock=%pK tag=0x%llx (uid=%u) pid=%u " "f_count=%lu\n", sock_tag_entry->sk, sock_tag_entry->tag, uid, @@ -2548,8 +2548,7 @@ static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry, uid_t stat_uid = get_uid_from_tag(tag); struct proc_print_info *ppi = m->private; /* Detailed tags are not available to everybody */ - if (get_atag_from_tag(tag) && !can_read_other_uid_stats( - make_kuid(&init_user_ns,stat_uid))) { + if (!can_read_other_uid_stats(make_kuid(&init_user_ns,stat_uid))) { CT_DEBUG("qtaguid: stats line: " "%s 0x%llx %u: insufficient priv " "from pid=%u tgid=%u uid=%u stats.gid=%u\n", -- GitLab From b3ea9acfdc84f307faf1288b60dc2f26e63bc247 Mon Sep 17 00:00:00 2001 From: Riley Andrews Date: Tue, 6 Sep 2016 15:16:25 -0700 Subject: [PATCH 1041/1442] ANDROID: cpuset: Make cpusets restore on hotplug This deliberately changes the behavior of the per-cpuset cpus file to not be effected by hotplug. When a cpu is offlined, it will be removed from the cpuset/cpus file. When a cpu is onlined, if the cpuset originally requested that that cpu was part of the cpuset, that cpu will be restored to the cpuset. The cpus files still have to be hierachical, but the ranges no longer have to be out of the currently online cpus, just the physically present cpus. Change-Id: I22cdf33e7d312117bcefba1aeb0125e1ada289a9 Signed-off-by: Dmitry Shmidt --- kernel/cpuset.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 29f815d2ef7e..f8b460f9d861 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -99,6 +99,7 @@ struct cpuset { /* user-configured CPUs and Memory Nodes allow to tasks */ cpumask_var_t cpus_allowed; + cpumask_var_t cpus_requested; nodemask_t mems_allowed; /* effective CPUs and Memory Nodes allow to tasks */ @@ -398,7 +399,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs, static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) { - return cpumask_subset(p->cpus_allowed, q->cpus_allowed) && + return cpumask_subset(p->cpus_requested, q->cpus_requested) && nodes_subset(p->mems_allowed, q->mems_allowed) && is_cpu_exclusive(p) <= is_cpu_exclusive(q) && is_mem_exclusive(p) <= is_mem_exclusive(q); @@ -498,7 +499,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) cpuset_for_each_child(c, css, par) { if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && c != cur && - cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) + cpumask_intersects(trial->cpus_requested, c->cpus_requested)) goto out; if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && c != cur && @@ -957,17 +958,18 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (!*buf) { cpumask_clear(trialcs->cpus_allowed); } else { - retval = cpulist_parse(buf, trialcs->cpus_allowed); + retval = cpulist_parse(buf, trialcs->cpus_requested); if (retval < 0) return retval; - if (!cpumask_subset(trialcs->cpus_allowed, - top_cpuset.cpus_allowed)) + if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask)) return -EINVAL; + + cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask); } /* Nothing to do if the cpus didn't change */ - if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) + if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested)) return 0; retval = validate_change(cs, trialcs); @@ -976,6 +978,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); + cpumask_copy(cs->cpus_requested, trialcs->cpus_requested); spin_unlock_irq(&callback_lock); /* use trialcs->cpus_allowed as a temp variable */ @@ -1760,7 +1763,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) switch (type) { case FILE_CPULIST: - seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); + seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested)); break; case FILE_MEMLIST: seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); @@ -1949,11 +1952,14 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) return ERR_PTR(-ENOMEM); if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) goto free_cs; + if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL)) + goto free_allowed; if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL)) - goto free_cpus; + goto free_requested; set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); cpumask_clear(cs->cpus_allowed); + cpumask_clear(cs->cpus_requested); nodes_clear(cs->mems_allowed); cpumask_clear(cs->effective_cpus); nodes_clear(cs->effective_mems); @@ -1962,7 +1968,9 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) return &cs->css; -free_cpus: +free_requested: + free_cpumask_var(cs->cpus_requested); +free_allowed: free_cpumask_var(cs->cpus_allowed); free_cs: kfree(cs); @@ -2025,6 +2033,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cs->mems_allowed = parent->mems_allowed; cs->effective_mems = parent->mems_allowed; cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); + cpumask_copy(cs->cpus_requested, parent->cpus_requested); cpumask_copy(cs->effective_cpus, parent->cpus_allowed); spin_unlock_irq(&callback_lock); out_unlock: @@ -2059,6 +2068,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) free_cpumask_var(cs->effective_cpus); free_cpumask_var(cs->cpus_allowed); + free_cpumask_var(cs->cpus_requested); kfree(cs); } @@ -2123,8 +2133,11 @@ int __init cpuset_init(void) BUG(); if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)) BUG(); + if (!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL)) + BUG(); cpumask_setall(top_cpuset.cpus_allowed); + cpumask_setall(top_cpuset.cpus_requested); nodes_setall(top_cpuset.mems_allowed); cpumask_setall(top_cpuset.effective_cpus); nodes_setall(top_cpuset.effective_mems); @@ -2258,7 +2271,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) goto retry; } - cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus); + cpumask_and(&new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus); nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems); cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); -- GitLab From 7e15dbf985571d3a9304cdfceb9612f9b18e3211 Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Fri, 2 Sep 2016 16:12:06 -0700 Subject: [PATCH 1042/1442] ANDROID: input: keyreset: switch to orderly_reboot Prior restart function would make a call to sys_sync and then execute a kernel reset. Rather than call the sync directly, thus necessitating this driver to be builtin, call orderly_reboot, which will take care of the file system sync. Note: since CONFIG_INPUT Kconfig is tristate, this driver can be built as module, despite being marked bool. Signed-off-by: Eric Ernst --- drivers/input/keyreset.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/keyreset.c b/drivers/input/keyreset.c index 7fbf7247e65f..7e5222aec7c1 100644 --- a/drivers/input/keyreset.c +++ b/drivers/input/keyreset.c @@ -32,8 +32,7 @@ struct keyreset_state { static void do_restart(struct work_struct *unused) { - sys_sync(); - kernel_restart(NULL); + orderly_reboot(); } static void do_reset_fn(void *priv) -- GitLab From a33d9f9fa3339bdcfd23aeb25fd64c6e9a6affd6 Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Sun, 18 Sep 2016 21:39:28 -0700 Subject: [PATCH 1043/1442] ANDROID: fiq_debugger: Pass task parameter to unwind_frame() Fixes: fe13f95b7200 ("arm64: pass a task parameter to unwind_frame()") Bug: 30369029 Patchset: rework-pagetable Change-Id: I9a4ab50ef61532d27282f189f063c938c196ec08 Signed-off-by: Jeff Vander Stoep --- drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c index 99c6584fcfa5..97246bcbcd62 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c @@ -197,6 +197,6 @@ void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output, frame.sp = regs->sp; frame.pc = regs->pc; output->printf(output, "\n"); - walk_stackframe(&frame, report_trace, &sts); + walk_stackframe(current, &frame, report_trace, &sts); } } -- GitLab From 07bec5d57d50e03f90c5f1f5b1a0dd5cfd9d3b00 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 27 Sep 2016 13:48:29 -0700 Subject: [PATCH 1044/1442] ANDROID: dm: android-verity: Remove fec_header location constraint This CL removes the mandate of the fec_header being located right after the ECC data. (Cherry-picked from https://android-review.googlesource.com/#/c/280401) Bug: 28865197 Signed-off-by: Badhri Jagan Sridharan Change-Id: Ie04c8cf2dd755f54d02dbdc4e734a13d6f6507b5 --- drivers/md/dm-android-verity.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index 15ce2a81c1f4..bb6c1285e499 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -266,10 +266,7 @@ static inline int validate_fec_header(struct fec_header *header, u64 offset) le32_to_cpu(header->version) != FEC_VERSION || le32_to_cpu(header->size) != sizeof(struct fec_header) || le32_to_cpu(header->roots) == 0 || - le32_to_cpu(header->roots) >= FEC_RSM || - offset < le32_to_cpu(header->fec_size) || - offset - le32_to_cpu(header->fec_size) != - le64_to_cpu(header->inp_size)) + le32_to_cpu(header->roots) >= FEC_RSM) return -EINVAL; return 0; -- GitLab From 579a63bf28cfddfaf51a18e816ac735fab2c2236 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 6 Oct 2016 16:14:16 -0700 Subject: [PATCH 1045/1442] CHROMIUM: cgroups: relax permissions on moving tasks between cgroups Android expects system_server to be able to move tasks between different cgroups/cpusets, but does not want to be running as root. Let's relax permission check so that processes can move other tasks if they have CAP_SYS_NICE in the affected task's user namespace. BUG=b:31790445,chromium:647994 TEST=Boot android container, examine logcat Change-Id: Ia919c66ab6ed6a6daf7c4cf67feb38b13b1ad09b Signed-off-by: Dmitry Torokhov Reviewed-on: https://chromium-review.googlesource.com/394927 Reviewed-by: Ricky Zhou --- kernel/cgroup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 85bc9beb046d..c014c9b25b38 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2856,7 +2856,8 @@ static int cgroup_procs_write_permission(struct task_struct *task, */ if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->euid, tcred->suid)) + !uid_eq(cred->euid, tcred->suid) && + !ns_capable(tcred->user_ns, CAP_SYS_NICE)) ret = -EACCES; if (!ret && cgroup_on_dfl(dst_cgrp)) { -- GitLab From 8cc698d951997aaed1888bfd1167c8d0e2849970 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 18 Oct 2016 16:20:23 -0700 Subject: [PATCH 1046/1442] ANDROID: [RFC]cgroup: Change from CAP_SYS_NICE to CAP_SYS_RESOURCE for cgroup migration permissions Try to better match what we're pushing upstream, use CAP_SYS_RESOURCE instead of CAP_SYS_NICE, which shoudln't affect Android as Zygote and system_server already use CAP_SYS_RESOURCE. Change-Id: I9b7ba2d9be1a469c9636497a6287f840891a91a8 Signed-off-by: John Stultz --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c014c9b25b38..866059af6bc6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2857,7 +2857,7 @@ static int cgroup_procs_write_permission(struct task_struct *task, if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && !uid_eq(cred->euid, tcred->suid) && - !ns_capable(tcred->user_ns, CAP_SYS_NICE)) + !ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) ret = -EACCES; if (!ret && cgroup_on_dfl(dst_cgrp)) { -- GitLab From 00c803798e81b42483ba544b683d6f49654c0244 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Wed, 13 Jul 2016 12:06:49 +0200 Subject: [PATCH 1047/1442] ANDROID: binder: split flat_binder_object. flat_binder_object is used for both handling binder objects and file descriptors, even though the two are mostly independent. Since we'll have more fixup objects in binder in the future, instead of extending flat_binder_object again, split out file descriptors to their own object while retaining backwards compatibility to existing user-space clients. All binder objects just share a header. Change-Id: If3c55f27a2aa8f21815383e0e807be47895e4786 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 158 +++++++++++++++++++--------- include/uapi/linux/android/binder.h | 31 +++++- 2 files changed, 137 insertions(+), 52 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 3c71b982bf2a..331d2abca9a2 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -145,6 +145,11 @@ module_param_call(stop_on_user_error, binder_set_stop_on_user_error, binder_stop_on_user_error = 2; \ } while (0) +#define to_flat_binder_object(hdr) \ + container_of(hdr, struct flat_binder_object, hdr) + +#define to_binder_fd_object(hdr) container_of(hdr, struct binder_fd_object, hdr) + enum binder_stat_types { BINDER_STAT_PROC, BINDER_STAT_THREAD, @@ -1240,6 +1245,47 @@ static void binder_send_failed_reply(struct binder_transaction *t, } } +/** + * binder_validate_object() - checks for a valid metadata object in a buffer. + * @buffer: binder_buffer that we're parsing. + * @offset: offset in the buffer at which to validate an object. + * + * Return: If there's a valid metadata object at @offset in @buffer, the + * size of that object. Otherwise, it returns zero. + */ +static size_t binder_validate_object(struct binder_buffer *buffer, u64 offset) +{ + /* Check if we can read a header first */ + struct binder_object_header *hdr; + size_t object_size = 0; + + if (offset > buffer->data_size - sizeof(*hdr) || + buffer->data_size < sizeof(*hdr) || + !IS_ALIGNED(offset, sizeof(u32))) + return 0; + + /* Ok, now see if we can read a complete object. */ + hdr = (struct binder_object_header *)(buffer->data + offset); + switch (hdr->type) { + case BINDER_TYPE_BINDER: + case BINDER_TYPE_WEAK_BINDER: + case BINDER_TYPE_HANDLE: + case BINDER_TYPE_WEAK_HANDLE: + object_size = sizeof(struct flat_binder_object); + break; + case BINDER_TYPE_FD: + object_size = sizeof(struct binder_fd_object); + break; + default: + return 0; + } + if (offset <= buffer->data_size - object_size && + buffer->data_size >= object_size) + return object_size; + else + return 0; +} + static void binder_transaction_buffer_release(struct binder_proc *proc, struct binder_buffer *buffer, binder_size_t *failed_at) @@ -1262,21 +1308,23 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, else off_end = (void *)offp + buffer->offsets_size; for (; offp < off_end; offp++) { - struct flat_binder_object *fp; + struct binder_object_header *hdr; + size_t object_size = binder_validate_object(buffer, *offp); - if (*offp > buffer->data_size - sizeof(*fp) || - buffer->data_size < sizeof(*fp) || - !IS_ALIGNED(*offp, sizeof(u32))) { - pr_err("transaction release %d bad offset %lld, size %zd\n", + if (object_size == 0) { + pr_err("transaction release %d bad object at offset %lld, size %zd\n", debug_id, (u64)*offp, buffer->data_size); continue; } - fp = (struct flat_binder_object *)(buffer->data + *offp); - switch (fp->type) { + hdr = (struct binder_object_header *)(buffer->data + *offp); + switch (hdr->type) { case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { - struct binder_node *node = binder_get_node(proc, fp->binder); + struct flat_binder_object *fp; + struct binder_node *node; + fp = to_flat_binder_object(hdr); + node = binder_get_node(proc, fp->binder); if (node == NULL) { pr_err("transaction release %d bad node %016llx\n", debug_id, (u64)fp->binder); @@ -1285,15 +1333,17 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, binder_debug(BINDER_DEBUG_TRANSACTION, " node %d u%016llx\n", node->debug_id, (u64)node->ptr); - binder_dec_node(node, fp->type == BINDER_TYPE_BINDER, 0); + binder_dec_node(node, hdr->type == BINDER_TYPE_BINDER, + 0); } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { + struct flat_binder_object *fp; struct binder_ref *ref; + fp = to_flat_binder_object(hdr); ref = binder_get_ref(proc, fp->handle, - fp->type == BINDER_TYPE_HANDLE); - + hdr->type == BINDER_TYPE_HANDLE); if (ref == NULL) { pr_err("transaction release %d bad handle %d\n", debug_id, fp->handle); @@ -1302,19 +1352,21 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d (node %d)\n", ref->debug_id, ref->desc, ref->node->debug_id); - binder_dec_ref(ref, fp->type == BINDER_TYPE_HANDLE); + binder_dec_ref(ref, hdr->type == BINDER_TYPE_HANDLE); } break; - case BINDER_TYPE_FD: + case BINDER_TYPE_FD: { + struct binder_fd_object *fp = to_binder_fd_object(hdr); + binder_debug(BINDER_DEBUG_TRANSACTION, - " fd %d\n", fp->handle); + " fd %d\n", fp->fd); if (failed_at) - task_close_fd(proc, fp->handle); - break; + task_close_fd(proc, fp->fd); + } break; default: pr_err("transaction release %d bad object type %x\n", - debug_id, fp->type); + debug_id, hdr->type); break; } } @@ -1531,28 +1583,29 @@ static void binder_transaction(struct binder_proc *proc, off_end = (void *)offp + tr->offsets_size; off_min = 0; for (; offp < off_end; offp++) { - struct flat_binder_object *fp; + struct binder_object_header *hdr; + size_t object_size = binder_validate_object(t->buffer, *offp); - if (*offp > t->buffer->data_size - sizeof(*fp) || - *offp < off_min || - t->buffer->data_size < sizeof(*fp) || - !IS_ALIGNED(*offp, sizeof(u32))) { - binder_user_error("%d:%d got transaction with invalid offset, %lld (min %lld, max %lld)\n", + if (object_size == 0 || *offp < off_min) { + binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n", proc->pid, thread->pid, (u64)*offp, (u64)off_min, - (u64)(t->buffer->data_size - - sizeof(*fp))); + (u64)t->buffer->data_size); return_error = BR_FAILED_REPLY; goto err_bad_offset; } - fp = (struct flat_binder_object *)(t->buffer->data + *offp); - off_min = *offp + sizeof(struct flat_binder_object); - switch (fp->type) { + + hdr = (struct binder_object_header *)(t->buffer->data + *offp); + off_min = *offp + object_size; + switch (hdr->type) { case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { + struct flat_binder_object *fp; + struct binder_node *node; struct binder_ref *ref; - struct binder_node *node = binder_get_node(proc, fp->binder); + fp = to_flat_binder_object(hdr); + node = binder_get_node(proc, fp->binder); if (node == NULL) { node = binder_new_node(proc, fp->binder, fp->cookie); if (node == NULL) { @@ -1580,14 +1633,14 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_binder_get_ref_for_node_failed; } - if (fp->type == BINDER_TYPE_BINDER) - fp->type = BINDER_TYPE_HANDLE; + if (hdr->type == BINDER_TYPE_BINDER) + hdr->type = BINDER_TYPE_HANDLE; else - fp->type = BINDER_TYPE_WEAK_HANDLE; + hdr->type = BINDER_TYPE_WEAK_HANDLE; fp->binder = 0; fp->handle = ref->desc; fp->cookie = 0; - binder_inc_ref(ref, fp->type == BINDER_TYPE_HANDLE, + binder_inc_ref(ref, hdr->type == BINDER_TYPE_HANDLE, &thread->todo); trace_binder_transaction_node_to_ref(t, node, ref); @@ -1598,11 +1651,12 @@ static void binder_transaction(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { + struct flat_binder_object *fp; struct binder_ref *ref; + fp = to_flat_binder_object(hdr); ref = binder_get_ref(proc, fp->handle, - fp->type == BINDER_TYPE_HANDLE); - + hdr->type == BINDER_TYPE_HANDLE); if (ref == NULL) { binder_user_error("%d:%d got transaction with invalid handle, %d\n", proc->pid, @@ -1616,13 +1670,15 @@ static void binder_transaction(struct binder_proc *proc, goto err_binder_get_ref_failed; } if (ref->node->proc == target_proc) { - if (fp->type == BINDER_TYPE_HANDLE) - fp->type = BINDER_TYPE_BINDER; + if (hdr->type == BINDER_TYPE_HANDLE) + hdr->type = BINDER_TYPE_BINDER; else - fp->type = BINDER_TYPE_WEAK_BINDER; + hdr->type = BINDER_TYPE_WEAK_BINDER; fp->binder = ref->node->ptr; fp->cookie = ref->node->cookie; - binder_inc_node(ref->node, fp->type == BINDER_TYPE_BINDER, 0, NULL); + binder_inc_node(ref->node, + hdr->type == BINDER_TYPE_BINDER, + 0, NULL); trace_binder_transaction_ref_to_node(t, ref); binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d -> node %d u%016llx\n", @@ -1639,7 +1695,9 @@ static void binder_transaction(struct binder_proc *proc, fp->binder = 0; fp->handle = new_ref->desc; fp->cookie = 0; - binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL); + binder_inc_ref(new_ref, + hdr->type == BINDER_TYPE_HANDLE, + NULL); trace_binder_transaction_ref_to_ref(t, ref, new_ref); binder_debug(BINDER_DEBUG_TRANSACTION, @@ -1652,25 +1710,26 @@ static void binder_transaction(struct binder_proc *proc, case BINDER_TYPE_FD: { int target_fd; struct file *file; + struct binder_fd_object *fp = to_binder_fd_object(hdr); if (reply) { if (!(in_reply_to->flags & TF_ACCEPT_FDS)) { binder_user_error("%d:%d got reply with fd, %d, but target does not allow fds\n", - proc->pid, thread->pid, fp->handle); + proc->pid, thread->pid, fp->fd); return_error = BR_FAILED_REPLY; goto err_fd_not_allowed; } } else if (!target_node->accept_fds) { binder_user_error("%d:%d got transaction with fd, %d, but target does not allow fds\n", - proc->pid, thread->pid, fp->handle); + proc->pid, thread->pid, fp->fd); return_error = BR_FAILED_REPLY; goto err_fd_not_allowed; } - file = fget(fp->handle); + file = fget(fp->fd); if (file == NULL) { binder_user_error("%d:%d got transaction with invalid fd, %d\n", - proc->pid, thread->pid, fp->handle); + proc->pid, thread->pid, fp->fd); return_error = BR_FAILED_REPLY; goto err_fget_failed; } @@ -1688,17 +1747,18 @@ static void binder_transaction(struct binder_proc *proc, goto err_get_unused_fd_failed; } task_fd_install(target_proc, target_fd, file); - trace_binder_transaction_fd(t, fp->handle, target_fd); + trace_binder_transaction_fd(t, fp->fd, target_fd); binder_debug(BINDER_DEBUG_TRANSACTION, - " fd %d -> %d\n", fp->handle, target_fd); + " fd %d -> %d\n", fp->fd, + target_fd); /* TODO: fput? */ - fp->binder = 0; - fp->handle = target_fd; + fp->pad_binder = 0; + fp->fd = target_fd; } break; default: binder_user_error("%d:%d got transaction with invalid object type, %x\n", - proc->pid, thread->pid, fp->type); + proc->pid, thread->pid, hdr->type); return_error = BR_FAILED_REPLY; goto err_bad_object_type; } diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 41420e341e75..f67c2b1c0713 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -48,6 +48,14 @@ typedef __u64 binder_size_t; typedef __u64 binder_uintptr_t; #endif +/** + * struct binder_object_header - header shared by all binder metadata objects. + * @type: type of the object + */ +struct binder_object_header { + __u32 type; +}; + /* * This is the flattened representation of a Binder object for transfer * between processes. The 'offsets' supplied as part of a binder transaction @@ -56,9 +64,8 @@ typedef __u64 binder_uintptr_t; * between processes. */ struct flat_binder_object { - /* 8 bytes for large_flat_header. */ - __u32 type; - __u32 flags; + struct binder_object_header hdr; + __u32 flags; /* 8 bytes of data. */ union { @@ -70,6 +77,24 @@ struct flat_binder_object { binder_uintptr_t cookie; }; +/** + * struct binder_fd_object - describes a filedescriptor to be fixed up. + * @hdr: common header structure + * @pad_flags: padding to remain compatible with old userspace code + * @pad_binder: padding to remain compatible with old userspace code + * @fd: file descriptor + * @cookie: opaque data, used by user-space + */ +struct binder_fd_object { + struct binder_object_header hdr; + __u32 pad_flags; + union { + binder_uintptr_t pad_binder; + __u32 fd; + }; + + binder_uintptr_t cookie; +}; /* * On 64-bit platforms where user code may run in 32-bits the driver must * translate the buffer (and local binder) addresses appropriately. -- GitLab From 0b3311e1951303ade6a1550837ba060cf7cfc809 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 30 Sep 2016 15:51:48 +0200 Subject: [PATCH 1048/1442] ANDROID: binder: support multiple context managers. Move the context manager state into a separate struct context, and allow for each process to have its own context associated with it. Change-Id: Ifa934370241a2d447dd519eac3fd0682c6d00ab4 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 60 ++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 331d2abca9a2..ba766b8548a7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -56,8 +56,6 @@ static HLIST_HEAD(binder_dead_nodes); static struct dentry *binder_debugfs_dir_entry_root; static struct dentry *binder_debugfs_dir_entry_proc; -static struct binder_node *binder_context_mgr_node; -static kuid_t binder_context_mgr_uid = INVALID_UID; static int binder_last_id; #define BINDER_DEBUG_ENTRY(name) \ @@ -215,6 +213,15 @@ static struct binder_transaction_log_entry *binder_transaction_log_add( return e; } +struct binder_context { + struct binder_node *binder_context_mgr_node; + kuid_t binder_context_mgr_uid; +}; + +static struct binder_context global_context = { + .binder_context_mgr_uid = INVALID_UID, +}; + struct binder_work { struct list_head entry; enum { @@ -330,6 +337,7 @@ struct binder_proc { int ready_threads; long default_priority; struct dentry *debugfs_entry; + struct binder_context *context; }; enum { @@ -934,8 +942,10 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, if (internal) { if (target_list == NULL && node->internal_strong_refs == 0 && - !(node == binder_context_mgr_node && - node->has_strong_ref)) { + !(node->proc && + node == node->proc->context-> + binder_context_mgr_node && + node->has_strong_ref)) { pr_err("invalid inc strong node for %d\n", node->debug_id); return -EINVAL; @@ -1036,6 +1046,7 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, struct rb_node **p = &proc->refs_by_node.rb_node; struct rb_node *parent = NULL; struct binder_ref *ref, *new_ref; + struct binder_context *context = proc->context; while (*p) { parent = *p; @@ -1058,7 +1069,7 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc, rb_link_node(&new_ref->rb_node_node, parent, p); rb_insert_color(&new_ref->rb_node_node, &proc->refs_by_node); - new_ref->desc = (node == binder_context_mgr_node) ? 0 : 1; + new_ref->desc = (node == context->binder_context_mgr_node) ? 0 : 1; for (n = rb_first(&proc->refs_by_desc); n != NULL; n = rb_next(n)) { ref = rb_entry(n, struct binder_ref, rb_node_desc); if (ref->desc > new_ref->desc) @@ -1388,6 +1399,7 @@ static void binder_transaction(struct binder_proc *proc, struct binder_transaction *in_reply_to = NULL; struct binder_transaction_log_entry *e; uint32_t return_error; + struct binder_context *context = proc->context; e = binder_transaction_log_add(&binder_transaction_log); e->call_type = reply ? 2 : !!(tr->flags & TF_ONE_WAY); @@ -1448,7 +1460,7 @@ static void binder_transaction(struct binder_proc *proc, } target_node = ref->node; } else { - target_node = binder_context_mgr_node; + target_node = context->binder_context_mgr_node; if (target_node == NULL) { return_error = BR_DEAD_REPLY; goto err_no_context_mgr_node; @@ -1839,6 +1851,7 @@ static int binder_thread_write(struct binder_proc *proc, binder_size_t *consumed) { uint32_t cmd; + struct binder_context *context = proc->context; void __user *buffer = (void __user *)(uintptr_t)binder_buffer; void __user *ptr = buffer + *consumed; void __user *end = buffer + size; @@ -1865,10 +1878,10 @@ static int binder_thread_write(struct binder_proc *proc, if (get_user(target, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); - if (target == 0 && binder_context_mgr_node && + if (target == 0 && context->binder_context_mgr_node && (cmd == BC_INCREFS || cmd == BC_ACQUIRE)) { ref = binder_get_ref_for_node(proc, - binder_context_mgr_node); + context->binder_context_mgr_node); if (ref->desc != target) { binder_user_error("%d:%d tried to acquire reference to desc 0, got %d instead\n", proc->pid, thread->pid, @@ -2774,9 +2787,11 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) { int ret = 0; struct binder_proc *proc = filp->private_data; + struct binder_context *context = proc->context; + kuid_t curr_euid = current_euid(); - if (binder_context_mgr_node != NULL) { + if (context->binder_context_mgr_node) { pr_err("BINDER_SET_CONTEXT_MGR already set\n"); ret = -EBUSY; goto out; @@ -2784,27 +2799,27 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp) ret = security_binder_set_context_mgr(proc->tsk); if (ret < 0) goto out; - if (uid_valid(binder_context_mgr_uid)) { - if (!uid_eq(binder_context_mgr_uid, curr_euid)) { + if (uid_valid(context->binder_context_mgr_uid)) { + if (!uid_eq(context->binder_context_mgr_uid, curr_euid)) { pr_err("BINDER_SET_CONTEXT_MGR bad uid %d != %d\n", from_kuid(&init_user_ns, curr_euid), from_kuid(&init_user_ns, - binder_context_mgr_uid)); + context->binder_context_mgr_uid)); ret = -EPERM; goto out; } } else { - binder_context_mgr_uid = curr_euid; + context->binder_context_mgr_uid = curr_euid; } - binder_context_mgr_node = binder_new_node(proc, 0, 0); - if (binder_context_mgr_node == NULL) { + context->binder_context_mgr_node = binder_new_node(proc, 0, 0); + if (!context->binder_context_mgr_node) { ret = -ENOMEM; goto out; } - binder_context_mgr_node->local_weak_refs++; - binder_context_mgr_node->local_strong_refs++; - binder_context_mgr_node->has_strong_ref = 1; - binder_context_mgr_node->has_weak_ref = 1; + context->binder_context_mgr_node->local_weak_refs++; + context->binder_context_mgr_node->local_strong_refs++; + context->binder_context_mgr_node->has_strong_ref = 1; + context->binder_context_mgr_node->has_weak_ref = 1; out: return ret; } @@ -3039,6 +3054,7 @@ static int binder_open(struct inode *nodp, struct file *filp) get_task_struct(current); proc->tsk = current; proc->vma_vm_mm = current->mm; + proc->context = &global_context; INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->wait); proc->default_priority = task_nice(current); @@ -3151,6 +3167,7 @@ static int binder_node_release(struct binder_node *node, int refs) static void binder_deferred_release(struct binder_proc *proc) { struct binder_transaction *t; + struct binder_context *context = proc->context; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, buffers, active_transactions, page_count; @@ -3160,11 +3177,12 @@ static void binder_deferred_release(struct binder_proc *proc) hlist_del(&proc->proc_node); - if (binder_context_mgr_node && binder_context_mgr_node->proc == proc) { + if (context->binder_context_mgr_node && + context->binder_context_mgr_node->proc == proc) { binder_debug(BINDER_DEBUG_DEAD_BINDER, "%s: %d context_mgr_node gone\n", __func__, proc->pid); - binder_context_mgr_node = NULL; + context->binder_context_mgr_node = NULL; } threads = 0; -- GitLab From 63b9f3b7e26d511d5bbe37ab2b408122726e62b0 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Mon, 17 Oct 2016 15:17:31 +0200 Subject: [PATCH 1049/1442] ANDROID: binder: deal with contexts in debugfs. Properly print the context in debugfs entries. Change-Id: If10c2129536d9f39bae542afd7318ca79af60e3a Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index ba766b8548a7..0b57866d61d3 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -189,6 +189,7 @@ struct binder_transaction_log_entry { int to_node; int data_size; int offsets_size; + const char *context_name; }; struct binder_transaction_log { int next; @@ -216,10 +217,12 @@ static struct binder_transaction_log_entry *binder_transaction_log_add( struct binder_context { struct binder_node *binder_context_mgr_node; kuid_t binder_context_mgr_uid; + const char *name; }; static struct binder_context global_context = { .binder_context_mgr_uid = INVALID_UID, + .name = "binder", }; struct binder_work { @@ -1408,6 +1411,7 @@ static void binder_transaction(struct binder_proc *proc, e->target_handle = tr->target.handle; e->data_size = tr->data_size; e->offsets_size = tr->offsets_size; + e->context_name = proc->context->name; if (reply) { in_reply_to = thread->transaction_stack; @@ -3073,8 +3077,17 @@ static int binder_open(struct inode *nodp, struct file *filp) char strbuf[11]; snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); + /* + * proc debug entries are shared between contexts, so + * this will fail if the process tries to open the driver + * again with a different context. The priting code will + * anyway print all contexts that a given PID has, so this + * is not a problem. + */ proc->debugfs_entry = debugfs_create_file(strbuf, S_IRUGO, - binder_debugfs_dir_entry_proc, proc, &binder_proc_fops); + binder_debugfs_dir_entry_proc, + (void *)(unsigned long)proc->pid, + &binder_proc_fops); } return 0; @@ -3469,6 +3482,7 @@ static void print_binder_proc(struct seq_file *m, size_t header_pos; seq_printf(m, "proc %d\n", proc->pid); + seq_printf(m, "context %s\n", proc->context->name); header_pos = m->count; for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) @@ -3593,6 +3607,7 @@ static void print_binder_proc_stats(struct seq_file *m, int count, strong, weak; seq_printf(m, "proc %d\n", proc->pid); + seq_printf(m, "context %s\n", proc->context->name); count = 0; for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) count++; @@ -3700,23 +3715,18 @@ static int binder_transactions_show(struct seq_file *m, void *unused) static int binder_proc_show(struct seq_file *m, void *unused) { struct binder_proc *itr; - struct binder_proc *proc = m->private; + int pid = (unsigned long)m->private; int do_lock = !binder_debug_no_lock; - bool valid_proc = false; if (do_lock) binder_lock(__func__); hlist_for_each_entry(itr, &binder_procs, proc_node) { - if (itr == proc) { - valid_proc = true; - break; + if (itr->pid == pid) { + seq_puts(m, "binder proc state:\n"); + print_binder_proc(m, itr, 1); } } - if (valid_proc) { - seq_puts(m, "binder proc state:\n"); - print_binder_proc(m, proc, 1); - } if (do_lock) binder_unlock(__func__); return 0; @@ -3726,11 +3736,11 @@ static void print_binder_transaction_log_entry(struct seq_file *m, struct binder_transaction_log_entry *e) { seq_printf(m, - "%d: %s from %d:%d to %d:%d node %d handle %d size %d:%d\n", + "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d\n", e->debug_id, (e->call_type == 2) ? "reply" : ((e->call_type == 1) ? "async" : "call "), e->from_proc, - e->from_thread, e->to_proc, e->to_thread, e->to_node, - e->target_handle, e->data_size, e->offsets_size); + e->from_thread, e->to_proc, e->to_thread, e->context_name, + e->to_node, e->target_handle, e->data_size, e->offsets_size); } static int binder_transaction_log_show(struct seq_file *m, void *unused) -- GitLab From 6b7c712f95ee473f08e828953bdb32be1daabfe3 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 30 Sep 2016 16:08:09 +0200 Subject: [PATCH 1050/1442] ANDROID: binder: support multiple /dev instances. Add a new module parameter 'devices', that can be used to specify the names of the binder device nodes we want to populate in /dev. Each device node has its own context manager, and is therefore logically separated from all the other device nodes. The config option CONFIG_ANDROID_BINDER_DEVICES can be used to set the default value of the parameter. This approach was favored over using IPC namespaces, mostly because we require a single process to be a part of multiple binder contexts, which seemed harder to achieve with namespaces. Change-Id: I3df72b2a19b5ad5a0360e6322482db7b00a12b24 Signed-off-by: Martijn Coenen --- drivers/android/Kconfig | 12 ++++++ drivers/android/binder.c | 83 ++++++++++++++++++++++++++++++++++------ 2 files changed, 84 insertions(+), 11 deletions(-) diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index bdfc6c6f4f5a..a82fc022d34b 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig @@ -19,6 +19,18 @@ config ANDROID_BINDER_IPC Android process, using Binder to identify, invoke and pass arguments between said processes. +config ANDROID_BINDER_DEVICES + string "Android Binder devices" + depends on ANDROID_BINDER_IPC + default "binder" + ---help--- + Default value for the binder.devices parameter. + + The binder.devices parameter is a comma-separated list of strings + that specifies the names of the binder device nodes that will be + created. Each binder device has its own context manager, and is + therefore logically separated from the other devices. + config ANDROID_BINDER_IPC_32BIT bool depends on !64BIT && ANDROID_BINDER_IPC diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 0b57866d61d3..5e84ed05cad5 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -50,6 +50,7 @@ static DEFINE_MUTEX(binder_main_lock); static DEFINE_MUTEX(binder_deferred_lock); static DEFINE_MUTEX(binder_mmap_lock); +static HLIST_HEAD(binder_devices); static HLIST_HEAD(binder_procs); static HLIST_HEAD(binder_deferred_list); static HLIST_HEAD(binder_dead_nodes); @@ -113,6 +114,9 @@ module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO); static bool binder_debug_no_lock; module_param_named(proc_no_lock, binder_debug_no_lock, bool, S_IWUSR | S_IRUGO); +static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; +module_param_named(devices, binder_devices_param, charp, S_IRUGO); + static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait); static int binder_stop_on_user_error; @@ -220,9 +224,10 @@ struct binder_context { const char *name; }; -static struct binder_context global_context = { - .binder_context_mgr_uid = INVALID_UID, - .name = "binder", +struct binder_device { + struct hlist_node hlist; + struct miscdevice miscdev; + struct binder_context context; }; struct binder_work { @@ -3048,6 +3053,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) static int binder_open(struct inode *nodp, struct file *filp) { struct binder_proc *proc; + struct binder_device *binder_dev; binder_debug(BINDER_DEBUG_OPEN_CLOSE, "binder_open: %d:%d\n", current->group_leader->pid, current->pid); @@ -3058,10 +3064,12 @@ static int binder_open(struct inode *nodp, struct file *filp) get_task_struct(current); proc->tsk = current; proc->vma_vm_mm = current->mm; - proc->context = &global_context; INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->wait); proc->default_priority = task_nice(current); + binder_dev = container_of(filp->private_data, struct binder_device, + miscdev); + proc->context = &binder_dev->context; binder_lock(__func__); @@ -3768,26 +3776,50 @@ static const struct file_operations binder_fops = { .release = binder_release, }; -static struct miscdevice binder_miscdev = { - .minor = MISC_DYNAMIC_MINOR, - .name = "binder", - .fops = &binder_fops -}; - BINDER_DEBUG_ENTRY(state); BINDER_DEBUG_ENTRY(stats); BINDER_DEBUG_ENTRY(transactions); BINDER_DEBUG_ENTRY(transaction_log); +static int __init init_binder_device(const char *name) +{ + int ret; + struct binder_device *binder_device; + + binder_device = kzalloc(sizeof(*binder_device), GFP_KERNEL); + if (!binder_device) + return -ENOMEM; + + binder_device->miscdev.fops = &binder_fops; + binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; + binder_device->miscdev.name = name; + + binder_device->context.binder_context_mgr_uid = INVALID_UID; + binder_device->context.name = name; + + ret = misc_register(&binder_device->miscdev); + if (ret < 0) { + kfree(binder_device); + return ret; + } + + hlist_add_head(&binder_device->hlist, &binder_devices); + + return ret; +} + static int __init binder_init(void) { int ret; + char *device_name, *device_names; + struct binder_device *device; + struct hlist_node *tmp; binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); if (binder_debugfs_dir_entry_root) binder_debugfs_dir_entry_proc = debugfs_create_dir("proc", binder_debugfs_dir_entry_root); - ret = misc_register(&binder_miscdev); + if (binder_debugfs_dir_entry_root) { debugfs_create_file("state", S_IRUGO, @@ -3815,6 +3847,35 @@ static int __init binder_init(void) &binder_transaction_log_failed, &binder_transaction_log_fops); } + + /* + * Copy the module_parameter string, because we don't want to + * tokenize it in-place. + */ + device_names = kzalloc(strlen(binder_devices_param) + 1, GFP_KERNEL); + if (!device_names) { + ret = -ENOMEM; + goto err_alloc_device_names_failed; + } + strcpy(device_names, binder_devices_param); + + while ((device_name = strsep(&device_names, ","))) { + ret = init_binder_device(device_name); + if (ret) + goto err_init_binder_device_failed; + } + + return ret; + +err_init_binder_device_failed: + hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) { + misc_deregister(&device->miscdev); + hlist_del(&device->hlist); + kfree(device); + } +err_alloc_device_names_failed: + debugfs_remove_recursive(binder_debugfs_dir_entry_root); + return ret; } -- GitLab From d82cb8b4f171885a3b277c1e9a9ae44e64506b66 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Thu, 29 Sep 2016 15:38:14 +0200 Subject: [PATCH 1051/1442] ANDROID: binder: refactor binder_transact() Moved handling of fixup for binder objects, handles and file descriptors into separate functions. Change-Id: If6849f1caee3834aa87d0ab08950bb1e21ec6e38 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 309 ++++++++++++++++++++++----------------- 1 file changed, 172 insertions(+), 137 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 5e84ed05cad5..5a312af1ab45 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1391,10 +1391,172 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, } } +static int binder_translate_binder(struct flat_binder_object *fp, + struct binder_transaction *t, + struct binder_thread *thread) +{ + struct binder_node *node; + struct binder_ref *ref; + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + + node = binder_get_node(proc, fp->binder); + if (!node) { + node = binder_new_node(proc, fp->binder, fp->cookie); + if (!node) + return -ENOMEM; + + node->min_priority = fp->flags & FLAT_BINDER_FLAG_PRIORITY_MASK; + node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); + } + if (fp->cookie != node->cookie) { + binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n", + proc->pid, thread->pid, (u64)fp->binder, + node->debug_id, (u64)fp->cookie, + (u64)node->cookie); + return -EINVAL; + } + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) + return -EPERM; + + ref = binder_get_ref_for_node(target_proc, node); + if (!ref) + return -EINVAL; + + if (fp->hdr.type == BINDER_TYPE_BINDER) + fp->hdr.type = BINDER_TYPE_HANDLE; + else + fp->hdr.type = BINDER_TYPE_WEAK_HANDLE; + fp->binder = 0; + fp->handle = ref->desc; + fp->cookie = 0; + binder_inc_ref(ref, fp->hdr.type == BINDER_TYPE_HANDLE, &thread->todo); + + trace_binder_transaction_node_to_ref(t, node, ref); + binder_debug(BINDER_DEBUG_TRANSACTION, + " node %d u%016llx -> ref %d desc %d\n", + node->debug_id, (u64)node->ptr, + ref->debug_id, ref->desc); + + return 0; +} + +static int binder_translate_handle(struct flat_binder_object *fp, + struct binder_transaction *t, + struct binder_thread *thread) +{ + struct binder_ref *ref; + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + + ref = binder_get_ref(proc, fp->handle, + fp->hdr.type == BINDER_TYPE_HANDLE); + if (!ref) { + binder_user_error("%d:%d got transaction with invalid handle, %d\n", + proc->pid, thread->pid, fp->handle); + return -EINVAL; + } + if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) + return -EPERM; + + if (ref->node->proc == target_proc) { + if (fp->hdr.type == BINDER_TYPE_HANDLE) + fp->hdr.type = BINDER_TYPE_BINDER; + else + fp->hdr.type = BINDER_TYPE_WEAK_BINDER; + fp->binder = ref->node->ptr; + fp->cookie = ref->node->cookie; + binder_inc_node(ref->node, fp->hdr.type == BINDER_TYPE_BINDER, + 0, NULL); + trace_binder_transaction_ref_to_node(t, ref); + binder_debug(BINDER_DEBUG_TRANSACTION, + " ref %d desc %d -> node %d u%016llx\n", + ref->debug_id, ref->desc, ref->node->debug_id, + (u64)ref->node->ptr); + } else { + struct binder_ref *new_ref; + + new_ref = binder_get_ref_for_node(target_proc, ref->node); + if (!new_ref) + return -EINVAL; + + fp->binder = 0; + fp->handle = new_ref->desc; + fp->cookie = 0; + binder_inc_ref(new_ref, fp->hdr.type == BINDER_TYPE_HANDLE, + NULL); + trace_binder_transaction_ref_to_ref(t, ref, new_ref); + binder_debug(BINDER_DEBUG_TRANSACTION, + " ref %d desc %d -> ref %d desc %d (node %d)\n", + ref->debug_id, ref->desc, new_ref->debug_id, + new_ref->desc, ref->node->debug_id); + } + return 0; +} + +static int binder_translate_fd(int fd, + struct binder_transaction *t, + struct binder_thread *thread, + struct binder_transaction *in_reply_to) +{ + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + int target_fd; + struct file *file; + int ret; + bool target_allows_fd; + + if (in_reply_to) + target_allows_fd = !!(in_reply_to->flags & TF_ACCEPT_FDS); + else + target_allows_fd = t->buffer->target_node->accept_fds; + if (!target_allows_fd) { + binder_user_error("%d:%d got %s with fd, %d, but target does not allow fds\n", + proc->pid, thread->pid, + in_reply_to ? "reply" : "transaction", + fd); + ret = -EPERM; + goto err_fd_not_accepted; + } + + file = fget(fd); + if (!file) { + binder_user_error("%d:%d got transaction with invalid fd, %d\n", + proc->pid, thread->pid, fd); + ret = -EBADF; + goto err_fget; + } + ret = security_binder_transfer_file(proc->tsk, target_proc->tsk, file); + if (ret < 0) { + ret = -EPERM; + goto err_security; + } + + target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC); + if (target_fd < 0) { + ret = -ENOMEM; + goto err_get_unused_fd; + } + task_fd_install(target_proc, target_fd, file); + trace_binder_transaction_fd(t, fd, target_fd); + binder_debug(BINDER_DEBUG_TRANSACTION, " fd %d -> %d\n", + fd, target_fd); + + return target_fd; + +err_get_unused_fd: +err_security: + fput(file); +err_fget: +err_fd_not_accepted: + return ret; +} + static void binder_transaction(struct binder_proc *proc, struct binder_thread *thread, struct binder_transaction_data *tr, int reply) { + int ret; struct binder_transaction *t; struct binder_work *tcomplete; binder_size_t *offp, *off_end; @@ -1622,157 +1784,35 @@ static void binder_transaction(struct binder_proc *proc, case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { struct flat_binder_object *fp; - struct binder_node *node; - struct binder_ref *ref; fp = to_flat_binder_object(hdr); - node = binder_get_node(proc, fp->binder); - if (node == NULL) { - node = binder_new_node(proc, fp->binder, fp->cookie); - if (node == NULL) { - return_error = BR_FAILED_REPLY; - goto err_binder_new_node_failed; - } - node->min_priority = fp->flags & FLAT_BINDER_FLAG_PRIORITY_MASK; - node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); - } - if (fp->cookie != node->cookie) { - binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n", - proc->pid, thread->pid, - (u64)fp->binder, node->debug_id, - (u64)fp->cookie, (u64)node->cookie); + ret = binder_translate_binder(fp, t, thread); + if (ret < 0) { return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_for_node_failed; + goto err_translate_failed; } - if (security_binder_transfer_binder(proc->tsk, - target_proc->tsk)) { - return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_for_node_failed; - } - ref = binder_get_ref_for_node(target_proc, node); - if (ref == NULL) { - return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_for_node_failed; - } - if (hdr->type == BINDER_TYPE_BINDER) - hdr->type = BINDER_TYPE_HANDLE; - else - hdr->type = BINDER_TYPE_WEAK_HANDLE; - fp->binder = 0; - fp->handle = ref->desc; - fp->cookie = 0; - binder_inc_ref(ref, hdr->type == BINDER_TYPE_HANDLE, - &thread->todo); - - trace_binder_transaction_node_to_ref(t, node, ref); - binder_debug(BINDER_DEBUG_TRANSACTION, - " node %d u%016llx -> ref %d desc %d\n", - node->debug_id, (u64)node->ptr, - ref->debug_id, ref->desc); } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { struct flat_binder_object *fp; - struct binder_ref *ref; fp = to_flat_binder_object(hdr); - ref = binder_get_ref(proc, fp->handle, - hdr->type == BINDER_TYPE_HANDLE); - if (ref == NULL) { - binder_user_error("%d:%d got transaction with invalid handle, %d\n", - proc->pid, - thread->pid, fp->handle); + ret = binder_translate_handle(fp, t, thread); + if (ret < 0) { return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_failed; - } - if (security_binder_transfer_binder(proc->tsk, - target_proc->tsk)) { - return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_failed; - } - if (ref->node->proc == target_proc) { - if (hdr->type == BINDER_TYPE_HANDLE) - hdr->type = BINDER_TYPE_BINDER; - else - hdr->type = BINDER_TYPE_WEAK_BINDER; - fp->binder = ref->node->ptr; - fp->cookie = ref->node->cookie; - binder_inc_node(ref->node, - hdr->type == BINDER_TYPE_BINDER, - 0, NULL); - trace_binder_transaction_ref_to_node(t, ref); - binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d -> node %d u%016llx\n", - ref->debug_id, ref->desc, ref->node->debug_id, - (u64)ref->node->ptr); - } else { - struct binder_ref *new_ref; - - new_ref = binder_get_ref_for_node(target_proc, ref->node); - if (new_ref == NULL) { - return_error = BR_FAILED_REPLY; - goto err_binder_get_ref_for_node_failed; - } - fp->binder = 0; - fp->handle = new_ref->desc; - fp->cookie = 0; - binder_inc_ref(new_ref, - hdr->type == BINDER_TYPE_HANDLE, - NULL); - trace_binder_transaction_ref_to_ref(t, ref, - new_ref); - binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d -> ref %d desc %d (node %d)\n", - ref->debug_id, ref->desc, new_ref->debug_id, - new_ref->desc, ref->node->debug_id); + goto err_translate_failed; } } break; case BINDER_TYPE_FD: { - int target_fd; - struct file *file; struct binder_fd_object *fp = to_binder_fd_object(hdr); + int target_fd = binder_translate_fd(fp->fd, t, thread, + in_reply_to); - if (reply) { - if (!(in_reply_to->flags & TF_ACCEPT_FDS)) { - binder_user_error("%d:%d got reply with fd, %d, but target does not allow fds\n", - proc->pid, thread->pid, fp->fd); - return_error = BR_FAILED_REPLY; - goto err_fd_not_allowed; - } - } else if (!target_node->accept_fds) { - binder_user_error("%d:%d got transaction with fd, %d, but target does not allow fds\n", - proc->pid, thread->pid, fp->fd); - return_error = BR_FAILED_REPLY; - goto err_fd_not_allowed; - } - - file = fget(fp->fd); - if (file == NULL) { - binder_user_error("%d:%d got transaction with invalid fd, %d\n", - proc->pid, thread->pid, fp->fd); - return_error = BR_FAILED_REPLY; - goto err_fget_failed; - } - if (security_binder_transfer_file(proc->tsk, - target_proc->tsk, - file) < 0) { - fput(file); - return_error = BR_FAILED_REPLY; - goto err_get_unused_fd_failed; - } - target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC); if (target_fd < 0) { - fput(file); return_error = BR_FAILED_REPLY; - goto err_get_unused_fd_failed; + goto err_translate_failed; } - task_fd_install(target_proc, target_fd, file); - trace_binder_transaction_fd(t, fp->fd, target_fd); - binder_debug(BINDER_DEBUG_TRANSACTION, - " fd %d -> %d\n", fp->fd, - target_fd); - /* TODO: fput? */ fp->pad_binder = 0; fp->fd = target_fd; } break; @@ -1809,12 +1849,7 @@ static void binder_transaction(struct binder_proc *proc, wake_up_interruptible(target_wait); return; -err_get_unused_fd_failed: -err_fget_failed: -err_fd_not_allowed: -err_binder_get_ref_for_node_failed: -err_binder_get_ref_failed: -err_binder_new_node_failed: +err_translate_failed: err_bad_object_type: err_bad_offset: err_copy_data_failed: -- GitLab From 59878d7f3e2f3bf4b32775f128c5f85fc0bdfec2 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 30 Sep 2016 14:05:40 +0200 Subject: [PATCH 1052/1442] ANDROID: binder: add extra size to allocator. The binder_buffer allocator currently only allocates space for the data and offsets buffers of a Parcel. This change allows for requesting an additional chunk of data in the buffer, which can for example be used to hold additional meta-data about the transaction (eg a security context). Change-Id: I58ab9c383a2e1a3057aae6adaa596ce867f1b157 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 41 +++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 5a312af1ab45..cbde27a9ddfc 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -302,6 +302,7 @@ struct binder_buffer { struct binder_node *target_node; size_t data_size; size_t offsets_size; + size_t extra_buffers_size; uint8_t data[0]; }; @@ -669,7 +670,9 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, size_t data_size, - size_t offsets_size, int is_async) + size_t offsets_size, + size_t extra_buffers_size, + int is_async) { struct rb_node *n = proc->free_buffers.rb_node; struct binder_buffer *buffer; @@ -677,7 +680,7 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, struct rb_node *best_fit = NULL; void *has_page_addr; void *end_page_addr; - size_t size; + size_t size, data_offsets_size; if (proc->vma == NULL) { pr_err("%d: binder_alloc_buf, no vma\n", @@ -685,15 +688,20 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, return NULL; } - size = ALIGN(data_size, sizeof(void *)) + + data_offsets_size = ALIGN(data_size, sizeof(void *)) + ALIGN(offsets_size, sizeof(void *)); - if (size < data_size || size < offsets_size) { + if (data_offsets_size < data_size || data_offsets_size < offsets_size) { binder_user_error("%d: got transaction with invalid size %zd-%zd\n", proc->pid, data_size, offsets_size); return NULL; } - + size = data_offsets_size + ALIGN(extra_buffers_size, sizeof(void *)); + if (size < data_offsets_size || size < extra_buffers_size) { + binder_user_error("%d: got transaction with invalid extra_buffers_size %zd\n", + proc->pid, extra_buffers_size); + return NULL; + } if (is_async && proc->free_async_space < size + sizeof(struct binder_buffer)) { binder_debug(BINDER_DEBUG_BUFFER_ALLOC, @@ -762,6 +770,7 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, proc->pid, size, buffer); buffer->data_size = data_size; buffer->offsets_size = offsets_size; + buffer->extra_buffers_size = extra_buffers_size; buffer->async_transaction = is_async; if (is_async) { proc->free_async_space -= size + sizeof(struct binder_buffer); @@ -836,7 +845,8 @@ static void binder_free_buf(struct binder_proc *proc, buffer_size = binder_buffer_size(proc, buffer); size = ALIGN(buffer->data_size, sizeof(void *)) + - ALIGN(buffer->offsets_size, sizeof(void *)); + ALIGN(buffer->offsets_size, sizeof(void *)) + + ALIGN(buffer->extra_buffers_size, sizeof(void *)); binder_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: binder_free_buf %p size %zd buffer_size %zd\n", @@ -1554,7 +1564,8 @@ static int binder_translate_fd(int fd, static void binder_transaction(struct binder_proc *proc, struct binder_thread *thread, - struct binder_transaction_data *tr, int reply) + struct binder_transaction_data *tr, int reply, + binder_size_t extra_buffers_size) { int ret; struct binder_transaction *t; @@ -1698,20 +1709,22 @@ static void binder_transaction(struct binder_proc *proc, if (reply) binder_debug(BINDER_DEBUG_TRANSACTION, - "%d:%d BC_REPLY %d -> %d:%d, data %016llx-%016llx size %lld-%lld\n", + "%d:%d BC_REPLY %d -> %d:%d, data %016llx-%016llx size %lld-%lld-%lld\n", proc->pid, thread->pid, t->debug_id, target_proc->pid, target_thread->pid, (u64)tr->data.ptr.buffer, (u64)tr->data.ptr.offsets, - (u64)tr->data_size, (u64)tr->offsets_size); + (u64)tr->data_size, (u64)tr->offsets_size, + (u64)extra_buffers_size); else binder_debug(BINDER_DEBUG_TRANSACTION, - "%d:%d BC_TRANSACTION %d -> %d - node %d, data %016llx-%016llx size %lld-%lld\n", + "%d:%d BC_TRANSACTION %d -> %d - node %d, data %016llx-%016llx size %lld-%lld-%lld\n", proc->pid, thread->pid, t->debug_id, target_proc->pid, target_node->debug_id, (u64)tr->data.ptr.buffer, (u64)tr->data.ptr.offsets, - (u64)tr->data_size, (u64)tr->offsets_size); + (u64)tr->data_size, (u64)tr->offsets_size, + (u64)extra_buffers_size); if (!reply && !(tr->flags & TF_ONE_WAY)) t->from = thread; @@ -1727,7 +1740,8 @@ static void binder_transaction(struct binder_proc *proc, trace_binder_transaction(reply, t, target_node); t->buffer = binder_alloc_buf(target_proc, tr->data_size, - tr->offsets_size, !reply && (t->flags & TF_ONE_WAY)); + tr->offsets_size, extra_buffers_size, + !reply && (t->flags & TF_ONE_WAY)); if (t->buffer == NULL) { return_error = BR_FAILED_REPLY; goto err_binder_alloc_buf_failed; @@ -2077,7 +2091,8 @@ static int binder_thread_write(struct binder_proc *proc, if (copy_from_user(&tr, ptr, sizeof(tr))) return -EFAULT; ptr += sizeof(tr); - binder_transaction(proc, thread, &tr, cmd == BC_REPLY); + binder_transaction(proc, thread, &tr, + cmd == BC_REPLY, 0); break; } -- GitLab From 5a6da53295adaf570c4e54216d0d6186c8d46892 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 30 Sep 2016 14:10:07 +0200 Subject: [PATCH 1053/1442] ANDROID: binder: support for scatter-gather. Previously all data passed over binder needed to be serialized, with the exception of Binder objects and file descriptors. This patchs adds support for scatter-gathering raw memory buffers into a binder transaction, avoiding the need to first serialize them into a Parcel. To remain backwards compatibile with existing binder clients, it introduces two new command ioctls for this purpose - BC_TRANSACTION_SG and BC_REPLY_SG. These commands may only be used with the new binder_transaction_data_sg structure, which adds a field for the total size of the buffers we are scatter-gathering. Because memory buffers may contain pointers to other buffers, we allow callers to specify a parent buffer and an offset into it, to indicate this is a location pointing to the buffer that we are fixing up. The kernel will then take care of fixing up the pointer to that buffer as well. Change-Id: I02417f28cff14688f2e1d6fcb959438fd96566cc Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 244 ++++++++++++++++++++++++++-- include/uapi/linux/android/binder.h | 45 +++++ 2 files changed, 276 insertions(+), 13 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index cbde27a9ddfc..6cf874769388 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -152,6 +152,9 @@ module_param_call(stop_on_user_error, binder_set_stop_on_user_error, #define to_binder_fd_object(hdr) container_of(hdr, struct binder_fd_object, hdr) +#define to_binder_buffer_object(hdr) \ + container_of(hdr, struct binder_buffer_object, hdr) + enum binder_stat_types { BINDER_STAT_PROC, BINDER_STAT_THREAD, @@ -165,7 +168,7 @@ enum binder_stat_types { struct binder_stats { int br[_IOC_NR(BR_FAILED_REPLY) + 1]; - int bc[_IOC_NR(BC_DEAD_BINDER_DONE) + 1]; + int bc[_IOC_NR(BC_REPLY_SG) + 1]; int obj_created[BINDER_STAT_COUNT]; int obj_deleted[BINDER_STAT_COUNT]; }; @@ -1305,6 +1308,9 @@ static size_t binder_validate_object(struct binder_buffer *buffer, u64 offset) case BINDER_TYPE_FD: object_size = sizeof(struct binder_fd_object); break; + case BINDER_TYPE_PTR: + object_size = sizeof(struct binder_buffer_object); + break; default: return 0; } @@ -1315,11 +1321,111 @@ static size_t binder_validate_object(struct binder_buffer *buffer, u64 offset) return 0; } +/** + * binder_validate_ptr() - validates binder_buffer_object in a binder_buffer. + * @b: binder_buffer containing the object + * @index: index in offset array at which the binder_buffer_object is + * located + * @start: points to the start of the offset array + * @num_valid: the number of valid offsets in the offset array + * + * Return: If @index is within the valid range of the offset array + * described by @start and @num_valid, and if there's a valid + * binder_buffer_object at the offset found in index @index + * of the offset array, that object is returned. Otherwise, + * %NULL is returned. + * Note that the offset found in index @index itself is not + * verified; this function assumes that @num_valid elements + * from @start were previously verified to have valid offsets. + */ +static struct binder_buffer_object *binder_validate_ptr(struct binder_buffer *b, + binder_size_t index, + binder_size_t *start, + binder_size_t num_valid) +{ + struct binder_buffer_object *buffer_obj; + binder_size_t *offp; + + if (index >= num_valid) + return NULL; + + offp = start + index; + buffer_obj = (struct binder_buffer_object *)(b->data + *offp); + if (buffer_obj->hdr.type != BINDER_TYPE_PTR) + return NULL; + + return buffer_obj; +} + +/** + * binder_validate_fixup() - validates pointer/fd fixups happen in order. + * @b: transaction buffer + * @objects_start start of objects buffer + * @buffer: binder_buffer_object in which to fix up + * @offset: start offset in @buffer to fix up + * @last_obj: last binder_buffer_object that we fixed up in + * @last_min_offset: minimum fixup offset in @last_obj + * + * Return: %true if a fixup in buffer @buffer at offset @offset is + * allowed. + * + * For safety reasons, we only allow fixups inside a buffer to happen + * at increasing offsets; additionally, we only allow fixup on the last + * buffer object that was verified, or one of its parents. + * + * Example of what is allowed: + * + * A + * B (parent = A, offset = 0) + * C (parent = A, offset = 16) + * D (parent = C, offset = 0) + * E (parent = A, offset = 32) // min_offset is 16 (C.parent_offset) + * + * Examples of what is not allowed: + * + * Decreasing offsets within the same parent: + * A + * C (parent = A, offset = 16) + * B (parent = A, offset = 0) // decreasing offset within A + * + * Referring to a parent that wasn't the last object or any of its parents: + * A + * B (parent = A, offset = 0) + * C (parent = A, offset = 0) + * C (parent = A, offset = 16) + * D (parent = B, offset = 0) // B is not A or any of A's parents + */ +static bool binder_validate_fixup(struct binder_buffer *b, + binder_size_t *objects_start, + struct binder_buffer_object *buffer, + binder_size_t fixup_offset, + struct binder_buffer_object *last_obj, + binder_size_t last_min_offset) +{ + if (!last_obj) { + /* Nothing to fix up in */ + return false; + } + + while (last_obj != buffer) { + /* + * Safe to retrieve the parent of last_obj, since it + * was already previously verified by the driver. + */ + if ((last_obj->flags & BINDER_BUFFER_FLAG_HAS_PARENT) == 0) + return false; + last_min_offset = last_obj->parent_offset + sizeof(uintptr_t); + last_obj = (struct binder_buffer_object *) + (b->data + *(objects_start + last_obj->parent)); + } + return (fixup_offset >= last_min_offset); +} + static void binder_transaction_buffer_release(struct binder_proc *proc, struct binder_buffer *buffer, binder_size_t *failed_at) { - binder_size_t *offp, *off_end; + binder_size_t *offp, *off_start, *off_end; int debug_id = buffer->debug_id; binder_debug(BINDER_DEBUG_TRANSACTION, @@ -1330,13 +1436,13 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, if (buffer->target_node) binder_dec_node(buffer->target_node, 1, 0); - offp = (binder_size_t *)(buffer->data + - ALIGN(buffer->data_size, sizeof(void *))); + off_start = (binder_size_t *)(buffer->data + + ALIGN(buffer->data_size, sizeof(void *))); if (failed_at) off_end = failed_at; else - off_end = (void *)offp + buffer->offsets_size; - for (; offp < off_end; offp++) { + off_end = (void *)off_start + buffer->offsets_size; + for (offp = off_start; offp < off_end; offp++) { struct binder_object_header *hdr; size_t object_size = binder_validate_object(buffer, *offp); @@ -1392,7 +1498,12 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, if (failed_at) task_close_fd(proc, fp->fd); } break; - + case BINDER_TYPE_PTR: + /* + * Nothing to do here, this will get cleaned up when the + * transaction buffer gets freed + */ + break; default: pr_err("transaction release %d bad object type %x\n", debug_id, hdr->type); @@ -1562,6 +1673,53 @@ static int binder_translate_fd(int fd, return ret; } +static int binder_fixup_parent(struct binder_transaction *t, + struct binder_thread *thread, + struct binder_buffer_object *bp, + binder_size_t *off_start, + binder_size_t num_valid, + struct binder_buffer_object *last_fixup_obj, + binder_size_t last_fixup_min_off) +{ + struct binder_buffer_object *parent; + u8 *parent_buffer; + struct binder_buffer *b = t->buffer; + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + + if (!(bp->flags & BINDER_BUFFER_FLAG_HAS_PARENT)) + return 0; + + parent = binder_validate_ptr(b, bp->parent, off_start, num_valid); + if (!parent) { + binder_user_error("%d:%d got transaction with invalid parent offset or type\n", + proc->pid, thread->pid); + return -EINVAL; + } + + if (!binder_validate_fixup(b, off_start, + parent, bp->parent_offset, + last_fixup_obj, + last_fixup_min_off)) { + binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n", + proc->pid, thread->pid); + return -EINVAL; + } + + if (parent->length < sizeof(binder_uintptr_t) || + bp->parent_offset > parent->length - sizeof(binder_uintptr_t)) { + /* No space for a pointer here! */ + binder_user_error("%d:%d got transaction with invalid parent offset\n", + proc->pid, thread->pid); + return -EINVAL; + } + parent_buffer = (u8 *)(parent->buffer - + target_proc->user_buffer_offset); + *(binder_uintptr_t *)(parent_buffer + bp->parent_offset) = bp->buffer; + + return 0; +} + static void binder_transaction(struct binder_proc *proc, struct binder_thread *thread, struct binder_transaction_data *tr, int reply, @@ -1570,8 +1728,9 @@ static void binder_transaction(struct binder_proc *proc, int ret; struct binder_transaction *t; struct binder_work *tcomplete; - binder_size_t *offp, *off_end; + binder_size_t *offp, *off_end, *off_start; binder_size_t off_min; + u8 *sg_bufp, *sg_buf_end; struct binder_proc *target_proc; struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; @@ -1580,6 +1739,8 @@ static void binder_transaction(struct binder_proc *proc, struct binder_transaction *in_reply_to = NULL; struct binder_transaction_log_entry *e; uint32_t return_error; + struct binder_buffer_object *last_fixup_obj = NULL; + binder_size_t last_fixup_min_off = 0; struct binder_context *context = proc->context; e = binder_transaction_log_add(&binder_transaction_log); @@ -1754,8 +1915,9 @@ static void binder_transaction(struct binder_proc *proc, if (target_node) binder_inc_node(target_node, 1, 0, NULL); - offp = (binder_size_t *)(t->buffer->data + - ALIGN(tr->data_size, sizeof(void *))); + off_start = (binder_size_t *)(t->buffer->data + + ALIGN(tr->data_size, sizeof(void *))); + offp = off_start; if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t) tr->data.ptr.buffer, tr->data_size)) { @@ -1777,7 +1939,16 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_bad_offset; } - off_end = (void *)offp + tr->offsets_size; + if (!IS_ALIGNED(extra_buffers_size, sizeof(u64))) { + binder_user_error("%d:%d got transaction with unaligned buffers size, %lld\n", + proc->pid, thread->pid, + extra_buffers_size); + return_error = BR_FAILED_REPLY; + goto err_bad_offset; + } + off_end = (void *)off_start + tr->offsets_size; + sg_bufp = (u8 *)(PTR_ALIGN(off_end, sizeof(void *))); + sg_buf_end = sg_bufp + extra_buffers_size; off_min = 0; for (; offp < off_end; offp++) { struct binder_object_header *hdr; @@ -1830,7 +2001,41 @@ static void binder_transaction(struct binder_proc *proc, fp->pad_binder = 0; fp->fd = target_fd; } break; - + case BINDER_TYPE_PTR: { + struct binder_buffer_object *bp = + to_binder_buffer_object(hdr); + size_t buf_left = sg_buf_end - sg_bufp; + + if (bp->length > buf_left) { + binder_user_error("%d:%d got transaction with too large buffer\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + goto err_bad_offset; + } + if (copy_from_user(sg_bufp, + (const void __user *)(uintptr_t) + bp->buffer, bp->length)) { + binder_user_error("%d:%d got transaction with invalid offsets ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + goto err_copy_data_failed; + } + /* Fixup buffer pointer to target proc address space */ + bp->buffer = (uintptr_t)sg_bufp + + target_proc->user_buffer_offset; + sg_bufp += ALIGN(bp->length, sizeof(u64)); + + ret = binder_fixup_parent(t, thread, bp, off_start, + offp - off_start, + last_fixup_obj, + last_fixup_min_off); + if (ret < 0) { + return_error = BR_FAILED_REPLY; + goto err_translate_failed; + } + last_fixup_obj = bp; + last_fixup_min_off = 0; + } break; default: binder_user_error("%d:%d got transaction with invalid object type, %x\n", proc->pid, thread->pid, hdr->type); @@ -2084,6 +2289,17 @@ static int binder_thread_write(struct binder_proc *proc, break; } + case BC_TRANSACTION_SG: + case BC_REPLY_SG: { + struct binder_transaction_data_sg tr; + + if (copy_from_user(&tr, ptr, sizeof(tr))) + return -EFAULT; + ptr += sizeof(tr); + binder_transaction(proc, thread, &tr.transaction_data, + cmd == BC_REPLY_SG, tr.buffers_size); + break; + } case BC_TRANSACTION: case BC_REPLY: { struct binder_transaction_data tr; @@ -3610,7 +3826,9 @@ static const char * const binder_command_strings[] = { "BC_EXIT_LOOPER", "BC_REQUEST_DEATH_NOTIFICATION", "BC_CLEAR_DEATH_NOTIFICATION", - "BC_DEAD_BINDER_DONE" + "BC_DEAD_BINDER_DONE", + "BC_TRANSACTION_SG", + "BC_REPLY_SG", }; static const char * const binder_objstat_strings[] = { diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index f67c2b1c0713..f3ef6e2634ba 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -33,6 +33,7 @@ enum { BINDER_TYPE_HANDLE = B_PACK_CHARS('s', 'h', '*', B_TYPE_LARGE), BINDER_TYPE_WEAK_HANDLE = B_PACK_CHARS('w', 'h', '*', B_TYPE_LARGE), BINDER_TYPE_FD = B_PACK_CHARS('f', 'd', '*', B_TYPE_LARGE), + BINDER_TYPE_PTR = B_PACK_CHARS('p', 't', '*', B_TYPE_LARGE), }; enum { @@ -95,6 +96,39 @@ struct binder_fd_object { binder_uintptr_t cookie; }; + +/* struct binder_buffer_object - object describing a userspace buffer + * @hdr: common header structure + * @flags: one or more BINDER_BUFFER_* flags + * @buffer: address of the buffer + * @length: length of the buffer + * @parent: index in offset array pointing to parent buffer + * @parent_offset: offset in @parent pointing to this buffer + * + * A binder_buffer object represents an object that the + * binder kernel driver can copy verbatim to the target + * address space. A buffer itself may be pointed to from + * within another buffer, meaning that the pointer inside + * that other buffer needs to be fixed up as well. This + * can be done by setting the BINDER_BUFFER_FLAG_HAS_PARENT + * flag in @flags, by setting @parent buffer to the index + * in the offset array pointing to the parent binder_buffer_object, + * and by setting @parent_offset to the offset in the parent buffer + * at which the pointer to this buffer is located. + */ +struct binder_buffer_object { + struct binder_object_header hdr; + __u32 flags; + binder_uintptr_t buffer; + binder_size_t length; + binder_size_t parent; + binder_size_t parent_offset; +}; + +enum { + BINDER_BUFFER_FLAG_HAS_PARENT = 0x01, +}; + /* * On 64-bit platforms where user code may run in 32-bits the driver must * translate the buffer (and local binder) addresses appropriately. @@ -187,6 +221,11 @@ struct binder_transaction_data { } data; }; +struct binder_transaction_data_sg { + struct binder_transaction_data transaction_data; + binder_size_t buffers_size; +}; + struct binder_ptr_cookie { binder_uintptr_t ptr; binder_uintptr_t cookie; @@ -371,6 +410,12 @@ enum binder_driver_command_protocol { /* * void *: cookie */ + + BC_TRANSACTION_SG = _IOW('c', 17, struct binder_transaction_data_sg), + BC_REPLY_SG = _IOW('c', 18, struct binder_transaction_data_sg), + /* + * binder_transaction_data_sg: the sent command. + */ }; #endif /* _UAPI_LINUX_BINDER_H */ -- GitLab From e3e0f480f51a0d26156daf6a97256df75c21a780 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Tue, 18 Oct 2016 13:58:55 +0200 Subject: [PATCH 1054/1442] ANDROID: binder: support for file-descriptor arrays. This patch introduces a new binder_fd_array object, that allows us to support one or more file descriptors embedded in a buffer that is scatter-gathered. Change-Id: I647a53cf0d905c7be0dfd9333806982def68dd74 Signed-off-by: Martijn Coenen --- drivers/android/binder.c | 137 ++++++++++++++++++++++++++++ include/uapi/linux/android/binder.h | 28 ++++++ 2 files changed, 165 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 6cf874769388..6d3f4eeadd28 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -155,6 +155,9 @@ module_param_call(stop_on_user_error, binder_set_stop_on_user_error, #define to_binder_buffer_object(hdr) \ container_of(hdr, struct binder_buffer_object, hdr) +#define to_binder_fd_array_object(hdr) \ + container_of(hdr, struct binder_fd_array_object, hdr) + enum binder_stat_types { BINDER_STAT_PROC, BINDER_STAT_THREAD, @@ -1311,6 +1314,9 @@ static size_t binder_validate_object(struct binder_buffer *buffer, u64 offset) case BINDER_TYPE_PTR: object_size = sizeof(struct binder_buffer_object); break; + case BINDER_TYPE_FDA: + object_size = sizeof(struct binder_fd_array_object); + break; default: return 0; } @@ -1504,6 +1510,47 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, * transaction buffer gets freed */ break; + case BINDER_TYPE_FDA: { + struct binder_fd_array_object *fda; + struct binder_buffer_object *parent; + uintptr_t parent_buffer; + u32 *fd_array; + size_t fd_index; + binder_size_t fd_buf_size; + + fda = to_binder_fd_array_object(hdr); + parent = binder_validate_ptr(buffer, fda->parent, + off_start, + offp - off_start); + if (!parent) { + pr_err("transaction release %d bad parent offset", + debug_id); + continue; + } + /* + * Since the parent was already fixed up, convert it + * back to kernel address space to access it + */ + parent_buffer = parent->buffer - + proc->user_buffer_offset; + + fd_buf_size = sizeof(u32) * fda->num_fds; + if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { + pr_err("transaction release %d invalid number of fds (%lld)\n", + debug_id, (u64)fda->num_fds); + continue; + } + if (fd_buf_size > parent->length || + fda->parent_offset > parent->length - fd_buf_size) { + /* No space for all file descriptors here. */ + pr_err("transaction release %d not enough space for %lld fds in buffer\n", + debug_id, (u64)fda->num_fds); + continue; + } + fd_array = (u32 *)(parent_buffer + fda->parent_offset); + for (fd_index = 0; fd_index < fda->num_fds; fd_index++) + task_close_fd(proc, fd_array[fd_index]); + } break; default: pr_err("transaction release %d bad object type %x\n", debug_id, hdr->type); @@ -1673,6 +1720,63 @@ static int binder_translate_fd(int fd, return ret; } +static int binder_translate_fd_array(struct binder_fd_array_object *fda, + struct binder_buffer_object *parent, + struct binder_transaction *t, + struct binder_thread *thread, + struct binder_transaction *in_reply_to) +{ + binder_size_t fdi, fd_buf_size, num_installed_fds; + int target_fd; + uintptr_t parent_buffer; + u32 *fd_array; + struct binder_proc *proc = thread->proc; + struct binder_proc *target_proc = t->to_proc; + + fd_buf_size = sizeof(u32) * fda->num_fds; + if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { + binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n", + proc->pid, thread->pid, (u64)fda->num_fds); + return -EINVAL; + } + if (fd_buf_size > parent->length || + fda->parent_offset > parent->length - fd_buf_size) { + /* No space for all file descriptors here. */ + binder_user_error("%d:%d not enough space to store %lld fds in buffer\n", + proc->pid, thread->pid, (u64)fda->num_fds); + return -EINVAL; + } + /* + * Since the parent was already fixed up, convert it + * back to the kernel address space to access it + */ + parent_buffer = parent->buffer - target_proc->user_buffer_offset; + fd_array = (u32 *)(parent_buffer + fda->parent_offset); + if (!IS_ALIGNED((unsigned long)fd_array, sizeof(u32))) { + binder_user_error("%d:%d parent offset not aligned correctly.\n", + proc->pid, thread->pid); + return -EINVAL; + } + for (fdi = 0; fdi < fda->num_fds; fdi++) { + target_fd = binder_translate_fd(fd_array[fdi], t, thread, + in_reply_to); + if (target_fd < 0) + goto err_translate_fd_failed; + fd_array[fdi] = target_fd; + } + return 0; + +err_translate_fd_failed: + /* + * Failed to allocate fd or security error, free fds + * installed so far. + */ + num_installed_fds = fdi; + for (fdi = 0; fdi < num_installed_fds; fdi++) + task_close_fd(target_proc, fd_array[fdi]); + return target_fd; +} + static int binder_fixup_parent(struct binder_transaction *t, struct binder_thread *thread, struct binder_buffer_object *bp, @@ -2001,6 +2105,38 @@ static void binder_transaction(struct binder_proc *proc, fp->pad_binder = 0; fp->fd = target_fd; } break; + case BINDER_TYPE_FDA: { + struct binder_fd_array_object *fda = + to_binder_fd_array_object(hdr); + struct binder_buffer_object *parent = + binder_validate_ptr(t->buffer, fda->parent, + off_start, + offp - off_start); + if (!parent) { + binder_user_error("%d:%d got transaction with invalid parent offset or type\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + goto err_bad_parent; + } + if (!binder_validate_fixup(t->buffer, off_start, + parent, fda->parent_offset, + last_fixup_obj, + last_fixup_min_off)) { + binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + goto err_bad_parent; + } + ret = binder_translate_fd_array(fda, parent, t, thread, + in_reply_to); + if (ret < 0) { + return_error = BR_FAILED_REPLY; + goto err_translate_failed; + } + last_fixup_obj = parent; + last_fixup_min_off = + fda->parent_offset + sizeof(u32) * fda->num_fds; + } break; case BINDER_TYPE_PTR: { struct binder_buffer_object *bp = to_binder_buffer_object(hdr); @@ -2071,6 +2207,7 @@ static void binder_transaction(struct binder_proc *proc, err_translate_failed: err_bad_object_type: err_bad_offset: +err_bad_parent: err_copy_data_failed: trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, t->buffer, offp); diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index f3ef6e2634ba..51f891fb1b18 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -33,6 +33,7 @@ enum { BINDER_TYPE_HANDLE = B_PACK_CHARS('s', 'h', '*', B_TYPE_LARGE), BINDER_TYPE_WEAK_HANDLE = B_PACK_CHARS('w', 'h', '*', B_TYPE_LARGE), BINDER_TYPE_FD = B_PACK_CHARS('f', 'd', '*', B_TYPE_LARGE), + BINDER_TYPE_FDA = B_PACK_CHARS('f', 'd', 'a', B_TYPE_LARGE), BINDER_TYPE_PTR = B_PACK_CHARS('p', 't', '*', B_TYPE_LARGE), }; @@ -129,6 +130,33 @@ enum { BINDER_BUFFER_FLAG_HAS_PARENT = 0x01, }; +/* struct binder_fd_array_object - object describing an array of fds in a buffer + * @hdr: common header structure + * @num_fds: number of file descriptors in the buffer + * @parent: index in offset array to buffer holding the fd array + * @parent_offset: start offset of fd array in the buffer + * + * A binder_fd_array object represents an array of file + * descriptors embedded in a binder_buffer_object. It is + * different from a regular binder_buffer_object because it + * describes a list of file descriptors to fix up, not an opaque + * blob of memory, and hence the kernel needs to treat it differently. + * + * An example of how this would be used is with Android's + * native_handle_t object, which is a struct with a list of integers + * and a list of file descriptors. The native_handle_t struct itself + * will be represented by a struct binder_buffer_objct, whereas the + * embedded list of file descriptors is represented by a + * struct binder_fd_array_object with that binder_buffer_object as + * a parent. + */ +struct binder_fd_array_object { + struct binder_object_header hdr; + binder_size_t num_fds; + binder_size_t parent; + binder_size_t parent_offset; +}; + /* * On 64-bit platforms where user code may run in 32-bits the driver must * translate the buffer (and local binder) addresses appropriately. -- GitLab From 4de9e33e504682662725d175a26b40ecb79a5d64 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 15 Sep 2016 16:05:40 +0530 Subject: [PATCH 1055/1442] ANDROID: usb: gadget: audio_source: fix comparison of distinct pointer types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use div_s64() instead of do_div() to fix following "comparison of distinct pointer types lacks a cast" warning in do_div() call in audio_send() for ARCH=arm in Linux 4.8-rc6: CC drivers/usb/gadget/function/f_audio_source.o In file included from ./arch/arm/include/asm/div64.h:126:0, from ./include/linux/kernel.h:142, from ./include/linux/list.h:8, from ./include/linux/kobject.h:20, from ./include/linux/device.h:17, from drivers/usb/gadget/function/f_audio_source.c:17: drivers/usb/gadget/function/f_audio_source.c: In function ‘audio_send’: ./include/asm-generic/div64.h:207:28: warning: comparison of distinct pointer types lacks a cast (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \ ^ drivers/usb/gadget/function/f_audio_source.c:381:2: note: in expansion of macro ‘do_div’ do_div(msecs, 1000000); ^ ./include/asm-generic/div64.h:207:28: warning: comparison of distinct pointer types lacks a cast (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \ ^ drivers/usb/gadget/function/f_audio_source.c:383:2: note: in expansion of macro ‘do_div’ do_div(frames, 1000); ^ LD drivers/usb/gadget/function/usb_f_audio_source.o Change-Id: Ie1a920c8948f3fc3f1263add25a402ded132fd66 Signed-off-by: Amit Pundir --- drivers/usb/gadget/function/f_audio_source.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c index bcd817439dbf..2489a5fa2685 100644 --- a/drivers/usb/gadget/function/f_audio_source.c +++ b/drivers/usb/gadget/function/f_audio_source.c @@ -377,10 +377,9 @@ static void audio_send(struct audio_dev *audio) /* compute number of frames to send */ now = ktime_get(); - msecs = ktime_to_ns(now) - ktime_to_ns(audio->start_time); - do_div(msecs, 1000000); - frames = msecs * SAMPLE_RATE; - do_div(frames, 1000); + msecs = div_s64((ktime_to_ns(now) - ktime_to_ns(audio->start_time)), + 1000000); + frames = div_s64((msecs * SAMPLE_RATE), 1000); /* Readjust our frames_sent if we fall too far behind. * If we get too far behind it is better to drop some frames than -- GitLab From f8e6ad2f1fce3d55acd97aa0522ad42c9496ad5a Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 11 Aug 2015 12:34:45 +0530 Subject: [PATCH 1056/1442] ANDROID: usb: gadget: f_mtp: simplify ptp NULL pointer check Simplify MTP/PTP dev NULL pointer check introduced in Change-Id: Ic44a699d96df2e13467fc081bff88b97dcc5afb2 and restrict it to MTP/PTP function level only. Return ERR_PTR() instead of NULL from mtp_ptp function to skip doing NULL pointer checks all the way up to configfs.c Fixes: Change-Id: Ic44a699d96df2e13467fc081bff88b97dcc5afb2 ("usb: gadget: fix NULL ptr derefer while symlinking PTP func") Change-Id: Iab7c55089c115550c3506f6cca960a07ae52713d Signed-off-by: Amit Pundir --- drivers/usb/gadget/configfs.c | 5 ----- drivers/usb/gadget/function/f_mtp.c | 2 +- drivers/usb/gadget/functions.c | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 44746a4fff1d..7461e644a8a9 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -426,11 +426,6 @@ static int config_usb_cfg_link( } f = usb_get_function(fi); - if (f == NULL) { - /* Are we trying to symlink PTP without MTP function? */ - ret = -EINVAL; /* Invalid Configuration */ - goto out; - } if (IS_ERR(f)) { ret = PTR_ERR(f); goto out; diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index e21d3e05a4af..cca19cfa44b9 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -1497,7 +1497,7 @@ struct usb_function *function_alloc_mtp_ptp(struct usb_function_instance *fi, pr_err("\t2: Create MTP function\n"); pr_err("\t3: Create and symlink PTP function" " with a gadget configuration\n"); - return NULL; + return ERR_PTR(-EINVAL); /* Invalid Configuration */ } dev = fi_mtp->dev; diff --git a/drivers/usb/gadget/functions.c b/drivers/usb/gadget/functions.c index 389c1f3d0fee..b13f839e7368 100644 --- a/drivers/usb/gadget/functions.c +++ b/drivers/usb/gadget/functions.c @@ -58,7 +58,7 @@ struct usb_function *usb_get_function(struct usb_function_instance *fi) struct usb_function *f; f = fi->fd->alloc_func(fi); - if ((f == NULL) || IS_ERR(f)) + if (IS_ERR(f)) return f; f->fi = fi; return f; -- GitLab From 051584e76d124163baa8b0ce78a39d8a29c64452 Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Fri, 11 Nov 2016 01:10:04 -0500 Subject: [PATCH 1057/1442] ANDROID: usb: gadget: function: cleanup: Add blank line after declaration Fix warning generated by checkpatch.pl: Missing a blank line after declarations Change-Id: Id129bb8cc8fa37c67a647e2e5996bb2817020e65 Signed-off-by: Anson Jacob --- drivers/usb/gadget/function/f_accessory.c | 2 ++ drivers/usb/gadget/function/f_audio_source.c | 1 + drivers/usb/gadget/function/f_mtp.c | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index 2ca16a577542..9d3ec0e37475 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -211,6 +211,7 @@ static inline struct acc_dev *func_to_dev(struct usb_function *f) static struct usb_request *acc_request_new(struct usb_ep *ep, int buffer_size) { struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) return NULL; @@ -1021,6 +1022,7 @@ acc_function_unbind(struct usb_configuration *c, struct usb_function *f) static void acc_start_work(struct work_struct *data) { char *envp[2] = { "ACCESSORY=START", NULL }; + kobject_uevent_env(&acc_device.this_device->kobj, KOBJ_CHANGE, envp); } diff --git a/drivers/usb/gadget/function/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c index 2489a5fa2685..db7903d19c43 100644 --- a/drivers/usb/gadget/function/f_audio_source.c +++ b/drivers/usb/gadget/function/f_audio_source.c @@ -310,6 +310,7 @@ static struct device_attribute *audio_source_function_attributes[] = { static struct usb_request *audio_request_new(struct usb_ep *ep, int buffer_size) { struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) return NULL; diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c index cca19cfa44b9..5cdee7bc5d38 100644 --- a/drivers/usb/gadget/function/f_mtp.c +++ b/drivers/usb/gadget/function/f_mtp.c @@ -361,6 +361,7 @@ static inline struct mtp_dev *func_to_mtp(struct usb_function *f) static struct usb_request *mtp_request_new(struct usb_ep *ep, int buffer_size) { struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL); + if (!req) return NULL; @@ -1121,6 +1122,7 @@ static int mtp_ctrlrequest(struct usb_composite_dev *cdev, } else if (ctrl->bRequest == MTP_REQ_GET_DEVICE_STATUS && w_index == 0 && w_value == 0) { struct mtp_device_status *status = cdev->req->buf; + status->wLength = __constant_cpu_to_le16(sizeof(*status)); @@ -1143,6 +1145,7 @@ static int mtp_ctrlrequest(struct usb_composite_dev *cdev, /* respond with data transfer or status phase? */ if (value >= 0) { int rc; + cdev->req->zero = value < w_length; cdev->req->length = value; rc = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC); @@ -1378,6 +1381,7 @@ static struct mtp_instance *to_mtp_instance(struct config_item *item) static void mtp_attr_release(struct config_item *item) { struct mtp_instance *fi_mtp = to_mtp_instance(item); + usb_put_function_instance(&fi_mtp->func_inst); } -- GitLab From 8ba4443660dc9760b3fb229092c69bba200fb659 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Mon, 28 Oct 2013 15:33:33 -0700 Subject: [PATCH 1058/1442] ANDROID: video: goldfishfb: add devicetree bindings Change-Id: I5f4ba861b981edf39af537001f8ac72202927031 Signed-off-by: Greg Hackmann --- drivers/video/fbdev/goldfishfb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index 7f6c9e6cfc6c..f0e651bbbd61 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -304,12 +304,19 @@ static int goldfish_fb_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id goldfish_fb_of_match[] = { + { .compatible = "google,goldfish-fb", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_fb_of_match); static struct platform_driver goldfish_fb_driver = { .probe = goldfish_fb_probe, .remove = goldfish_fb_remove, .driver = { - .name = "goldfish_fb" + .name = "goldfish_fb", + .owner = THIS_MODULE, + .of_match_table = goldfish_fb_of_match, } }; -- GitLab From f85111e72da98ad3d8d4da51440dd74eadcb1167 Mon Sep 17 00:00:00 2001 From: Yu Ning Date: Thu, 12 Feb 2015 11:44:40 +0800 Subject: [PATCH 1059/1442] ANDROID: goldfish: Enable ACPI-based enumeration for goldfish framebuffer Follow the same way in which ACPI was enabled for goldfish battery. See commit d3be10e for details. Note that this patch also depends on commit af33cac. Change-Id: Ic63b6e7e0a4b9896ef9a9d0ed135a7796a4c1fdb Signed-off-by: Yu Ning --- drivers/video/fbdev/goldfishfb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index f0e651bbbd61..58b33e4af16e 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -26,6 +26,7 @@ #include #include #include +#include enum { FB_GET_WIDTH = 0x00, @@ -310,6 +311,12 @@ static const struct of_device_id goldfish_fb_of_match[] = { }; MODULE_DEVICE_TABLE(of, goldfish_fb_of_match); +static const struct acpi_device_id goldfish_fb_acpi_match[] = { + { "GFSH0004", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_fb_acpi_match); + static struct platform_driver goldfish_fb_driver = { .probe = goldfish_fb_probe, .remove = goldfish_fb_remove, @@ -317,6 +324,7 @@ static struct platform_driver goldfish_fb_driver = { .name = "goldfish_fb", .owner = THIS_MODULE, .of_match_table = goldfish_fb_of_match, + .acpi_match_table = ACPI_PTR(goldfish_fb_acpi_match), } }; -- GitLab From 6ab8758442864278c64b67ae307a81c35d0c9904 Mon Sep 17 00:00:00 2001 From: Yu Ning Date: Tue, 31 Mar 2015 14:41:48 +0800 Subject: [PATCH 1060/1442] ANDROID: goldfish: Enable ACPI-based enumeration for goldfish audio Follow the same way in which ACPI was enabled for goldfish battery. See commit d3be10e for details. Change-Id: I6ffe38ebc80fb8af8322152370b9d1fd227eaf50 Signed-off-by: Yu Ning --- drivers/staging/goldfish/goldfish_audio.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/staging/goldfish/goldfish_audio.c b/drivers/staging/goldfish/goldfish_audio.c index bd559956f199..5e93b57a0645 100644 --- a/drivers/staging/goldfish/goldfish_audio.c +++ b/drivers/staging/goldfish/goldfish_audio.c @@ -28,6 +28,7 @@ #include #include #include +#include MODULE_AUTHOR("Google, Inc."); MODULE_DESCRIPTION("Android QEMU Audio Driver"); @@ -351,12 +352,19 @@ static const struct of_device_id goldfish_audio_of_match[] = { }; MODULE_DEVICE_TABLE(of, goldfish_audio_of_match); +static const struct acpi_device_id goldfish_audio_acpi_match[] = { + { "GFSH0005", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_audio_acpi_match); + static struct platform_driver goldfish_audio_driver = { .probe = goldfish_audio_probe, .remove = goldfish_audio_remove, .driver = { .name = "goldfish_audio", .of_match_table = goldfish_audio_of_match, + .acpi_match_table = ACPI_PTR(goldfish_audio_acpi_match), } }; -- GitLab From 60bfe37d35e8e3ccd934593229d7930b42978343 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 19 Jun 2014 16:24:04 +0200 Subject: [PATCH 1061/1442] ANDROID: goldfish_fb: Set pixclock = 0 User space Android code identifies pixclock == 0 as a sign for emulation and will set the frame rate to 60 fps when reading this value, which is the desired outcome. Change-Id: I759bf518bf6683446bc786bf1be3cafa02dd8d42 Signed-off-by: Christoffer Dall Signed-off-by: Peter Maydell --- drivers/video/fbdev/goldfishfb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index 58b33e4af16e..131fee0341c3 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -235,7 +235,7 @@ static int goldfish_fb_probe(struct platform_device *pdev) fb->fb.var.activate = FB_ACTIVATE_NOW; fb->fb.var.height = readl(fb->reg_base + FB_GET_PHYS_HEIGHT); fb->fb.var.width = readl(fb->reg_base + FB_GET_PHYS_WIDTH); - fb->fb.var.pixclock = 10000; + fb->fb.var.pixclock = 0; fb->fb.var.red.offset = 11; fb->fb.var.red.length = 5; -- GitLab From 832579317d8c2b92c47eff2176b134fc63e5c572 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Fri, 18 Dec 2015 12:04:43 -0800 Subject: [PATCH 1062/1442] ANDROID: goldfish_events: no extra EV_SYN; register goldfish If we send SYN_REPORT on every single multitouch event, it breaks the multitouch. The multitouch becomes janky and having to click 2-3 times to do stuff (plus randomly activating notification bars when not clicking) If we suppress these SYN_REPORTS, multitouch will work fine, plus the events will have a protocol that looks nice. In addition, we need to register Goldfish Events as a multitouch device by issuing input_mt_init_slots, otherwise input_handle_abs_event in drivers/input/input.c will silently drop all ABS_MT_SLOT events, making it so that touches with more than 1 finger do not work properly. Signed-off-by: "Lingfeng Yang" Change-Id: Ib2350f7d1732449d246f6f0d9b7b08f02cc7c2dd (cherry picked from commit 6cf40d0a16330e1ef42bdf07d9aba6c16ee11fbc) --- drivers/input/keyboard/goldfish_events.c | 28 +++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/goldfish_events.c b/drivers/input/keyboard/goldfish_events.c index f6e643b589b6..c877e56a9bd5 100644 --- a/drivers/input/keyboard/goldfish_events.c +++ b/drivers/input/keyboard/goldfish_events.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,8 @@ #include #include +#define GOLDFISH_MAX_FINGERS 5 + enum { REG_READ = 0x00, REG_SET_PAGE = 0x00, @@ -52,7 +55,21 @@ static irqreturn_t events_interrupt(int irq, void *dev_id) value = __raw_readl(edev->addr + REG_READ); input_event(edev->input, type, code, value); - input_sync(edev->input); + // Send an extra (EV_SYN, SYN_REPORT, 0x0) event + // if a key was pressed. Some keyboard device + // drivers may only send the EV_KEY event and + // not EV_SYN. + // Note that sending an extra SYN_REPORT is not + // necessary nor correct protocol with other + // devices such as touchscreens, which will send + // their own SYN_REPORT's when sufficient event + // information has been collected (e.g., for + // touchscreens, when pressure and X/Y coordinates + // have been received). Hence, we will only send + // this extra SYN_REPORT if type == EV_KEY. + if (type == EV_KEY) { + input_sync(edev->input); + } return IRQ_HANDLED; } @@ -154,6 +171,15 @@ static int events_probe(struct platform_device *pdev) input_dev->name = edev->name; input_dev->id.bustype = BUS_HOST; + // Set the Goldfish Device to be multi-touch. + // In the Ranchu kernel, there is multi-touch-specific + // code for handling ABS_MT_SLOT events. + // See drivers/input/input.c:input_handle_abs_event. + // If we do not issue input_mt_init_slots, + // the kernel will filter out needed ABS_MT_SLOT + // events when we touch the screen in more than one place, + // preventing multi-touch with more than one finger from working. + input_mt_init_slots(input_dev, GOLDFISH_MAX_FINGERS, 0); events_import_bits(edev, input_dev->evbit, EV_SYN, EV_MAX); events_import_bits(edev, input_dev->keybit, EV_KEY, KEY_MAX); -- GitLab From ebb099e1ba529dfa6e85d400b33fd23fa89933d8 Mon Sep 17 00:00:00 2001 From: Joshua Lang Date: Fri, 17 Jun 2016 17:30:55 -0700 Subject: [PATCH 1063/1442] ANDROID: goldfish_audio: Clear audio read buffer status after each read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buffer_status field is interrupt updated. After every read request, the buffer_status read field should be reset so that on the next loop iteration we don't read a stale value and read data before the device is ready. Signed-off-by: “Joshua Lang” Change-Id: I4943d5aaada1cad9c7e59a94a87c387578dabe86 --- drivers/staging/goldfish/goldfish_audio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/staging/goldfish/goldfish_audio.c b/drivers/staging/goldfish/goldfish_audio.c index 5e93b57a0645..0bb0ee2e691f 100644 --- a/drivers/staging/goldfish/goldfish_audio.c +++ b/drivers/staging/goldfish/goldfish_audio.c @@ -117,6 +117,7 @@ static ssize_t goldfish_audio_read(struct file *fp, char __user *buf, size_t count, loff_t *pos) { struct goldfish_audio *data = fp->private_data; + unsigned long irq_flags; int length; int result = 0; @@ -130,6 +131,10 @@ static ssize_t goldfish_audio_read(struct file *fp, char __user *buf, wait_event_interruptible(data->wait, data->buffer_status & AUDIO_INT_READ_BUFFER_FULL); + spin_lock_irqsave(&data->lock, irq_flags); + data->buffer_status &= ~AUDIO_INT_READ_BUFFER_FULL; + spin_unlock_irqrestore(&data->lock, irq_flags); + length = AUDIO_READ(data, AUDIO_READ_BUFFER_AVAILABLE); /* copy data to user space */ -- GitLab From 05f0d2a5985bf4e1036ab66a32d9a94c104ee8d3 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Fri, 7 Oct 2016 16:20:47 -0700 Subject: [PATCH 1064/1442] ANDROID: goldfish: add ranchu defconfigs Change-Id: I73ef1b132b6203ae921a1e1d4850eaadf58f8926 --- arch/arm/configs/ranchu_defconfig | 315 +++++++++++++++++ arch/arm64/configs/ranchu_defconfig | 311 +++++++++++++++++ arch/x86/configs/i386_ranchu_defconfig | 422 +++++++++++++++++++++++ arch/x86/configs/x86_64_ranchu_defconfig | 417 ++++++++++++++++++++++ 4 files changed, 1465 insertions(+) create mode 100644 arch/arm/configs/ranchu_defconfig create mode 100644 arch/arm64/configs/ranchu_defconfig create mode 100644 arch/x86/configs/i386_ranchu_defconfig create mode 100644 arch/x86/configs/x86_64_ranchu_defconfig diff --git a/arch/arm/configs/ranchu_defconfig b/arch/arm/configs/ranchu_defconfig new file mode 100644 index 000000000000..35a90af941a4 --- /dev/null +++ b/arch/arm/configs/ranchu_defconfig @@ -0,0 +1,315 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_ARCH_MMAP_RND_BITS=16 +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARCH_VIRT=y +CONFIG_ARM_KERNMEM_PERMS=y +CONFIG_SMP=y +CONFIG_PREEMPT=y +CONFIG_AEABI=y +CONFIG_HIGHMEM=y +CONFIG_KSM=y +CONFIG_SECCOMP=y +CONFIG_CMDLINE="console=ttyAMA0" +CONFIG_VFP=y +CONFIG_NEON=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_INET_ESP=y +# CONFIG_INET_LRO is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_BRIDGE=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_CFI_AMDSTD=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_SMSC911X=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_USB_USBNET=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_AMBAKMI=y +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_MEDIA_SUPPORT=y +CONFIG_FB=y +CONFIG_FB_GOLDFISH=y +CONFIG_FB_SIMPLE=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL031=y +CONFIG_VIRTIO_MMIO=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_DEBUG_INFO=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_PANIC_TIMEOUT=5 +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_VIRTUALIZATION=y diff --git a/arch/arm64/configs/ranchu_defconfig b/arch/arm64/configs/ranchu_defconfig new file mode 100644 index 000000000000..00eb346e0928 --- /dev/null +++ b/arch/arm64/configs/ranchu_defconfig @@ -0,0 +1,311 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_ARCH_MMAP_RND_BITS=24 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16 +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_ARCH_VEXPRESS=y +CONFIG_NR_CPUS=4 +CONFIG_PREEMPT=y +CONFIG_KSM=y +CONFIG_SECCOMP=y +CONFIG_ARMV8_DEPRECATED=y +CONFIG_SWP_EMULATION=y +CONFIG_CP15_BARRIER_EMULATION=y +CONFIG_SETEND_EMULATION=y +CONFIG_CMDLINE="console=ttyAMA0" +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_COMPAT=y +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_INET_ESP=y +# CONFIG_INET_LRO is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_RPFILTER=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_TARGET_ECN=y +CONFIG_IP_NF_TARGET_TTL=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_AH=y +CONFIG_IP6_NF_MATCH_EUI64=y +CONFIG_IP6_NF_MATCH_FRAG=y +CONFIG_IP6_NF_MATCH_OPTS=y +CONFIG_IP6_NF_MATCH_HL=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_MATCH_MH=y +CONFIG_IP6_NF_MATCH_RT=y +CONFIG_IP6_NF_TARGET_HL=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_BRIDGE=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_SCSI=y +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_SMC91X=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +# CONFIG_HW_RANDOM is not set +CONFIG_BATTERY_GOLDFISH=y +# CONFIG_HWMON is not set +CONFIG_MEDIA_SUPPORT=y +CONFIG_FB=y +CONFIG_FB_GOLDFISH=y +CONFIG_FB_SIMPLE=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +# CONFIG_USB_SUPPORT is not set +CONFIG_RTC_CLASS=y +CONFIG_VIRTIO_MMIO=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_EXT2_FS=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_PANIC_TIMEOUT=5 +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +# CONFIG_FTRACE is not set +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_DEBUG_RODATA=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig new file mode 100644 index 000000000000..b0e4e0ed4b11 --- /dev/null +++ b/arch/x86/configs/i386_ranchu_defconfig @@ -0,0 +1,422 @@ +# CONFIG_64BIT is not set +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_ARCH_MMAP_RND_BITS=16 +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_SMP=y +CONFIG_X86_BIGSMP=y +CONFIG_MCORE2=y +CONFIG_X86_GENERIC=y +CONFIG_HPET_TIMER=y +CONFIG_NR_CPUS=512 +CONFIG_PREEMPT=y +# CONFIG_X86_MCE is not set +CONFIG_X86_REBOOTFIXUPS=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_KSM=y +CONFIG_CMA=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_HZ_100=y +CONFIG_PHYSICAL_START=0x100000 +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_CPU_FREQ=y +# CONFIG_CPU_FREQ_STAT is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_PCIEPORTBUS=y +# CONFIG_PCIEASPM is not set +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=16 +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_PATA_MPIIX=y +CONFIG_ATA_GENERIC=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_BNX2=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y +CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +CONFIG_R8169=y +CONFIG_FDDI=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_USB_USBNET=y +CONFIG_INPUT_POLLDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_NVRAM=y +CONFIG_I2C_I801=y +CONFIG_BATTERY_GOLDFISH=y +CONFIG_WATCHDOG=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_EFI=y +CONFIG_FB_GOLDFISH=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_EDAC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_VIRTIO_PCI=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_SND_HDA_INTEL=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ISCSI_IBFT_FIND=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_FUSE_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=2048 +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_CRYPTO_AES_586=y +CONFIG_CRYPTO_TWOFISH=y +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_PKCS7_TEST_KEY=y +# CONFIG_VIRTUALIZATION is not set +CONFIG_CRC_T10DIF=y diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig new file mode 100644 index 000000000000..8dae21ed3ede --- /dev/null +++ b/arch/x86/configs/x86_64_ranchu_defconfig @@ -0,0 +1,417 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_ARCH_MMAP_RND_BITS=32 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16 +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_SMP=y +CONFIG_MCORE2=y +CONFIG_MAXSMP=y +CONFIG_PREEMPT=y +# CONFIG_X86_MCE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_KSM=y +CONFIG_CMA=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_HZ_100=y +CONFIG_PHYSICAL_START=0x100000 +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_CPU_FREQ=y +# CONFIG_CPU_FREQ_STAT is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_PCI_MMCONFIG=y +CONFIG_PCIEPORTBUS=y +# CONFIG_PCIEASPM is not set +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_IA32_EMULATION=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DMA_CMA=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_PATA_MPIIX=y +CONFIG_ATA_GENERIC=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_BNX2=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y +CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +CONFIG_R8169=y +CONFIG_FDDI=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_USB_USBNET=y +CONFIG_INPUT_POLLDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_NVRAM=y +CONFIG_I2C_I801=y +CONFIG_BATTERY_GOLDFISH=y +CONFIG_WATCHDOG=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_EFI=y +CONFIG_FB_GOLDFISH=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_EDAC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_VIRTIO_PCI=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_SND_HDA_INTEL=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ISCSI_IBFT_FIND=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_FUSE_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_CRYPTO_TWOFISH=y +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_PKCS7_TEST_KEY=y +# CONFIG_VIRTUALIZATION is not set +CONFIG_CRC_T10DIF=y -- GitLab From a50f1dbcef02adc7a8e0fbb18c9ac475f82102e3 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Mon, 13 Jun 2016 09:24:07 -0700 Subject: [PATCH 1065/1442] ANDROID: goldfish: Add goldfish sync driver This is kernel driver for controlling the Goldfish sync device on the host. It is used to maintain ordering in critical OpenGL state changes while using GPU emulation. The guest open()'s the Goldfish sync device to create a context for possibly maintaining sync timeline and fences. There is a 1:1 correspondence between such sync contexts and OpenGL contexts in the guest that need synchronization (which in turn, is anything involving swapping buffers, SurfaceFlinger, or Hardware Composer). The ioctl QUEUE_WORK takes a handle to a sync object and attempts to tell the host GPU to wait on the sync object and deal with signaling it. It possibly outputs a fence FD on which the Android systems that use them (GLConsumer, SurfaceFlinger, anything employing EGL_ANDROID_native_fence_sync) can use to wait. Design decisions and work log: - New approach is to have the guest issue ioctls that trigger host wait, and then host increments timeline. - We need the host's sync object handle and sync thread handle as the necessary information for that. - ioctl() from guest can work simultaneously with the interrupt handling for commands from host. - optimization: don't write back on timeline inc - Change spin lock design to be much more lightweight; do not call sw_sync functions or loop too long anywhere. - Send read/write commands in batches to minimize guest/host transitions. - robustness: BUG if we will overrun the cmd buffer. - robustness: return fd -1 if we cannot get an unused fd. - correctness: remove global mutex - cleanup pass done, incl. but not limited to: - removal of clear_upto and - switching to devm_*** This is part of a sequential, multi-CL change: external/qemu: https://android-review.googlesource.com/239442 <- host-side device's host interface https://android-review.googlesource.com/221593 https://android-review.googlesource.com/248563 https://android-review.googlesource.com/248564 https://android-review.googlesource.com/223032 external/qemu-android: https://android-review.googlesource.com/238790 <- host-side device implementation kernel/goldfish: https://android-review.googlesource.com/232631 <- needed https://android-review.googlesource.com/238399 <- this CL Also squash following bug fixes from android-goldfish-3.18 branch. b44d486 goldfish_sync: provide a signal to detect reboot ad1f597 goldfish_sync: fix stalls by avoiding early kfree() de208e8 [goldfish-sync] Fix possible race between kernel and user space Change-Id: I22f8a0e824717a7e751b1b0e1b461455501502b6 --- arch/x86/configs/i386_ranchu_defconfig | 1 + arch/x86/configs/x86_64_ranchu_defconfig | 1 + drivers/staging/goldfish/Kconfig | 6 + drivers/staging/goldfish/Makefile | 5 + drivers/staging/goldfish/goldfish_sync.c | 987 +++++++++++++++++++++++ 5 files changed, 1000 insertions(+) create mode 100644 drivers/staging/goldfish/goldfish_sync.c diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig index b0e4e0ed4b11..0206eb8cfb61 100644 --- a/arch/x86/configs/i386_ranchu_defconfig +++ b/arch/x86/configs/i386_ranchu_defconfig @@ -363,6 +363,7 @@ CONFIG_SYNC=y CONFIG_SW_SYNC=y CONFIG_ION=y CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH_SYNC=y CONFIG_SND_HDA_INTEL=y CONFIG_GOLDFISH=y CONFIG_GOLDFISH_PIPE=y diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig index 8dae21ed3ede..dd389774bacb 100644 --- a/arch/x86/configs/x86_64_ranchu_defconfig +++ b/arch/x86/configs/x86_64_ranchu_defconfig @@ -360,6 +360,7 @@ CONFIG_SYNC=y CONFIG_SW_SYNC=y CONFIG_ION=y CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH_SYNC=y CONFIG_SND_HDA_INTEL=y CONFIG_GOLDFISH=y CONFIG_GOLDFISH_PIPE=y diff --git a/drivers/staging/goldfish/Kconfig b/drivers/staging/goldfish/Kconfig index 4e094602437c..c579141a7bed 100644 --- a/drivers/staging/goldfish/Kconfig +++ b/drivers/staging/goldfish/Kconfig @@ -4,6 +4,12 @@ config GOLDFISH_AUDIO ---help--- Emulated audio channel for the Goldfish Android Virtual Device +config GOLDFISH_SYNC + tristate "Goldfish AVD Sync Driver" + depends on GOLDFISH + ---help--- + Emulated sync fences for the Goldfish Android Virtual Device + config MTD_GOLDFISH_NAND tristate "Goldfish NAND device" depends on GOLDFISH diff --git a/drivers/staging/goldfish/Makefile b/drivers/staging/goldfish/Makefile index dec34ad58162..0cf525588210 100644 --- a/drivers/staging/goldfish/Makefile +++ b/drivers/staging/goldfish/Makefile @@ -4,3 +4,8 @@ obj-$(CONFIG_GOLDFISH_AUDIO) += goldfish_audio.o obj-$(CONFIG_MTD_GOLDFISH_NAND) += goldfish_nand.o + +# and sync + +ccflags-y := -Idrivers/staging/android +obj-$(CONFIG_GOLDFISH_SYNC) += goldfish_sync.o diff --git a/drivers/staging/goldfish/goldfish_sync.c b/drivers/staging/goldfish/goldfish_sync.c new file mode 100644 index 000000000000..ba8def29901e --- /dev/null +++ b/drivers/staging/goldfish/goldfish_sync.c @@ -0,0 +1,987 @@ +/* + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "sw_sync.h" +#include "sync.h" + +#define ERR(...) printk(KERN_ERR __VA_ARGS__); + +#define INFO(...) printk(KERN_INFO __VA_ARGS__); + +#define DPRINT(...) pr_debug(__VA_ARGS__); + +#define DTRACE() DPRINT("%s: enter", __func__) + +/* The Goldfish sync driver is designed to provide a interface + * between the underlying host's sync device and the kernel's + * sw_sync. + * The purpose of the device/driver is to enable lightweight + * creation and signaling of timelines and fences + * in order to synchronize the guest with host-side graphics events. + * + * Each time the interrupt trips, the driver + * may perform a sw_sync operation. + */ + +/* The operations are: */ + +/* Ready signal - used to mark when irq should lower */ +#define CMD_SYNC_READY 0 + +/* Create a new timeline. writes timeline handle */ +#define CMD_CREATE_SYNC_TIMELINE 1 + +/* Create a fence object. reads timeline handle and time argument. + * Writes fence fd to the SYNC_REG_HANDLE register. */ +#define CMD_CREATE_SYNC_FENCE 2 + +/* Increments timeline. reads timeline handle and time argument */ +#define CMD_SYNC_TIMELINE_INC 3 + +/* Destroys a timeline. reads timeline handle */ +#define CMD_DESTROY_SYNC_TIMELINE 4 + +/* Starts a wait on the host with + * the given glsync object and sync thread handle. */ +#define CMD_TRIGGER_HOST_WAIT 5 + +/* The register layout is: */ + +#define SYNC_REG_BATCH_COMMAND 0x00 /* host->guest batch commands */ +#define SYNC_REG_BATCH_GUESTCOMMAND 0x04 /* guest->host batch commands */ +#define SYNC_REG_BATCH_COMMAND_ADDR 0x08 /* communicate physical address of host->guest batch commands */ +#define SYNC_REG_BATCH_COMMAND_ADDR_HIGH 0x0c /* 64-bit part */ +#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR 0x10 /* communicate physical address of guest->host commands */ +#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH 0x14 /* 64-bit part */ +#define SYNC_REG_INIT 0x18 /* signals that the device has been probed */ + +/* There is an ioctl associated with goldfish sync driver. + * Make it conflict with ioctls that are not likely to be used + * in the emulator. + * + * '@' 00-0F linux/radeonfb.h conflict! + * '@' 00-0F drivers/video/aty/aty128fb.c conflict! + */ +#define GOLDFISH_SYNC_IOC_MAGIC '@' + +#define GOLDFISH_SYNC_IOC_QUEUE_WORK _IOWR(GOLDFISH_SYNC_IOC_MAGIC, 0, struct goldfish_sync_ioctl_info) + +/* The above definitions (command codes, register layout, ioctl definitions) + * need to be in sync with the following files: + * + * Host-side (emulator): + * external/qemu/android/emulation/goldfish_sync.h + * external/qemu-android/hw/misc/goldfish_sync.c + * + * Guest-side (system image): + * device/generic/goldfish-opengl/system/egl/goldfish_sync.h + * device/generic/goldfish/ueventd.ranchu.rc + * platform/build/target/board/generic/sepolicy/file_contexts + */ +struct goldfish_sync_hostcmd { + /* sorted for alignment */ + uint64_t handle; + uint64_t hostcmd_handle; + uint32_t cmd; + uint32_t time_arg; +}; + +struct goldfish_sync_guestcmd { + uint64_t host_command; /* uint64_t for alignment */ + uint64_t glsync_handle; + uint64_t thread_handle; + uint64_t guest_timeline_handle; +}; + +#define GOLDFISH_SYNC_MAX_CMDS 64 + +struct goldfish_sync_state { + char __iomem *reg_base; + int irq; + + /* Spinlock protects |to_do| / |to_do_end|. */ + spinlock_t lock; + /* |mutex_lock| protects all concurrent access + * to timelines for both kernel and user space. */ + struct mutex mutex_lock; + + /* Buffer holding commands issued from host. */ + struct goldfish_sync_hostcmd to_do[GOLDFISH_SYNC_MAX_CMDS]; + uint32_t to_do_end; + + /* Addresses for the reading or writing + * of individual commands. The host can directly write + * to |batch_hostcmd| (and then this driver immediately + * copies contents to |to_do|). This driver either replies + * through |batch_hostcmd| or simply issues a + * guest->host command through |batch_guestcmd|. + */ + struct goldfish_sync_hostcmd *batch_hostcmd; + struct goldfish_sync_guestcmd *batch_guestcmd; + + /* Used to give this struct itself to a work queue + * function for executing actual sync commands. */ + struct work_struct work_item; +}; + +static struct goldfish_sync_state global_sync_state[1]; + +struct goldfish_sync_timeline_obj { + struct sw_sync_timeline *sw_sync_tl; + uint32_t current_time; + /* We need to be careful about when we deallocate + * this |goldfish_sync_timeline_obj| struct. + * In order to ensure proper cleanup, we need to + * consider the triggered host-side wait that may + * still be in flight when the guest close()'s a + * goldfish_sync device's sync context fd (and + * destroys the |sw_sync_tl| field above). + * The host-side wait may raise IRQ + * and tell the kernel to increment the timeline _after_ + * the |sw_sync_tl| has already been set to null. + * + * From observations on OpenGL apps and CTS tests, this + * happens at some very low probability upon context + * destruction or process close, but it does happen + * and it needs to be handled properly. Otherwise, + * if we clean up the surrounding |goldfish_sync_timeline_obj| + * too early, any |handle| field of any host->guest command + * might not even point to a null |sw_sync_tl| field, + * but to garbage memory or even a reclaimed |sw_sync_tl|. + * If we do not count such "pending waits" and kfree the object + * immediately upon |goldfish_sync_timeline_destroy|, + * we might get mysterous RCU stalls after running a long + * time because the garbage memory that is being read + * happens to be interpretable as a |spinlock_t| struct + * that is currently in the locked state. + * + * To track when to free the |goldfish_sync_timeline_obj| + * itself, we maintain a kref. + * The kref essentially counts the timeline itself plus + * the number of waits in flight. kref_init/kref_put + * are issued on + * |goldfish_sync_timeline_create|/|goldfish_sync_timeline_destroy| + * and kref_get/kref_put are issued on + * |goldfish_sync_fence_create|/|goldfish_sync_timeline_inc|. + * + * The timeline is destroyed after reference count + * reaches zero, which would happen after + * |goldfish_sync_timeline_destroy| and all pending + * |goldfish_sync_timeline_inc|'s are fulfilled. + * + * NOTE (1): We assume that |fence_create| and + * |timeline_inc| calls are 1:1, otherwise the kref scheme + * will not work. This is a valid assumption as long + * as the host-side virtual device implementation + * does not insert any timeline increments + * that we did not trigger from here. + * + * NOTE (2): The use of kref by itself requires no locks, + * but this does not mean everything works without locks. + * Related timeline operations do require a lock of some sort, + * or at least are not proven to work without it. + * In particualr, we assume that all the operations + * done on the |kref| field above are done in contexts where + * |global_sync_state->mutex_lock| is held. Do not + * remove that lock until everything is proven to work + * without it!!! */ + struct kref kref; +}; + +/* We will call |delete_timeline_obj| when the last reference count + * of the kref is decremented. This deletes the sw_sync + * timeline object along with the wrapper itself. */ +static void delete_timeline_obj(struct kref* kref) { + struct goldfish_sync_timeline_obj* obj = + container_of(kref, struct goldfish_sync_timeline_obj, kref); + + sync_timeline_destroy(&obj->sw_sync_tl->obj); + obj->sw_sync_tl = NULL; + kfree(obj); +} + +static uint64_t gensym_ctr; +static void gensym(char *dst) +{ + sprintf(dst, "goldfish_sync:gensym:%llu", gensym_ctr); + gensym_ctr++; +} + +/* |goldfish_sync_timeline_create| assumes that |global_sync_state->mutex_lock| + * is held. */ +static struct goldfish_sync_timeline_obj* +goldfish_sync_timeline_create(void) +{ + + char timeline_name[256]; + struct sw_sync_timeline *res_sync_tl = NULL; + struct goldfish_sync_timeline_obj *res; + + DTRACE(); + + gensym(timeline_name); + + res_sync_tl = sw_sync_timeline_create(timeline_name); + if (!res_sync_tl) { + ERR("Failed to create sw_sync timeline."); + return NULL; + } + + res = kzalloc(sizeof(struct goldfish_sync_timeline_obj), GFP_KERNEL); + res->sw_sync_tl = res_sync_tl; + res->current_time = 0; + kref_init(&res->kref); + + DPRINT("new timeline_obj=0x%p", res); + return res; +} + +/* |goldfish_sync_fence_create| assumes that |global_sync_state->mutex_lock| + * is held. */ +static int +goldfish_sync_fence_create(struct goldfish_sync_timeline_obj *obj, + uint32_t val) +{ + + int fd; + char fence_name[256]; + struct sync_pt *syncpt = NULL; + struct sync_fence *sync_obj = NULL; + struct sw_sync_timeline *tl; + + DTRACE(); + + if (!obj) return -1; + + tl = obj->sw_sync_tl; + + syncpt = sw_sync_pt_create(tl, val); + if (!syncpt) { + ERR("could not create sync point! " + "sync_timeline=0x%p val=%d", + tl, val); + return -1; + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + ERR("could not get unused fd for sync fence. " + "errno=%d", fd); + goto err_cleanup_pt; + } + + gensym(fence_name); + + sync_obj = sync_fence_create(fence_name, syncpt); + if (!sync_obj) { + ERR("could not create sync fence! " + "sync_timeline=0x%p val=%d sync_pt=0x%p", + tl, val, syncpt); + goto err_cleanup_fd_pt; + } + + DPRINT("installing sync fence into fd %d sync_obj=0x%p", fd, sync_obj); + sync_fence_install(sync_obj, fd); + kref_get(&obj->kref); + + return fd; + +err_cleanup_fd_pt: + put_unused_fd(fd); +err_cleanup_pt: + sync_pt_free(syncpt); + return -1; +} + +/* |goldfish_sync_timeline_inc| assumes that |global_sync_state->mutex_lock| + * is held. */ +static void +goldfish_sync_timeline_inc(struct goldfish_sync_timeline_obj *obj, uint32_t inc) +{ + DTRACE(); + /* Just give up if someone else nuked the timeline. + * Whoever it was won't care that it doesn't get signaled. */ + if (!obj) return; + + DPRINT("timeline_obj=0x%p", obj); + sw_sync_timeline_inc(obj->sw_sync_tl, inc); + DPRINT("incremented timeline. increment max_time"); + obj->current_time += inc; + + /* Here, we will end up deleting the timeline object if it + * turns out that this call was a pending increment after + * |goldfish_sync_timeline_destroy| was called. */ + kref_put(&obj->kref, delete_timeline_obj); + DPRINT("done"); +} + +/* |goldfish_sync_timeline_destroy| assumes + * that |global_sync_state->mutex_lock| is held. */ +static void +goldfish_sync_timeline_destroy(struct goldfish_sync_timeline_obj *obj) +{ + DTRACE(); + /* See description of |goldfish_sync_timeline_obj| for why we + * should not immediately destroy |obj| */ + kref_put(&obj->kref, delete_timeline_obj); +} + +static inline void +goldfish_sync_cmd_queue(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t handle, + uint32_t time_arg, + uint64_t hostcmd_handle) +{ + struct goldfish_sync_hostcmd *to_add; + + DTRACE(); + + BUG_ON(sync_state->to_do_end == GOLDFISH_SYNC_MAX_CMDS); + + to_add = &sync_state->to_do[sync_state->to_do_end]; + + to_add->cmd = cmd; + to_add->handle = handle; + to_add->time_arg = time_arg; + to_add->hostcmd_handle = hostcmd_handle; + + sync_state->to_do_end += 1; +} + +static inline void +goldfish_sync_hostcmd_reply(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t handle, + uint32_t time_arg, + uint64_t hostcmd_handle) +{ + unsigned long irq_flags; + struct goldfish_sync_hostcmd *batch_hostcmd = + sync_state->batch_hostcmd; + + DTRACE(); + + spin_lock_irqsave(&sync_state->lock, irq_flags); + + batch_hostcmd->cmd = cmd; + batch_hostcmd->handle = handle; + batch_hostcmd->time_arg = time_arg; + batch_hostcmd->hostcmd_handle = hostcmd_handle; + writel(0, sync_state->reg_base + SYNC_REG_BATCH_COMMAND); + + spin_unlock_irqrestore(&sync_state->lock, irq_flags); +} + +static inline void +goldfish_sync_send_guestcmd(struct goldfish_sync_state *sync_state, + uint32_t cmd, + uint64_t glsync_handle, + uint64_t thread_handle, + uint64_t timeline_handle) +{ + unsigned long irq_flags; + struct goldfish_sync_guestcmd *batch_guestcmd = + sync_state->batch_guestcmd; + + DTRACE(); + + spin_lock_irqsave(&sync_state->lock, irq_flags); + + batch_guestcmd->host_command = (uint64_t)cmd; + batch_guestcmd->glsync_handle = (uint64_t)glsync_handle; + batch_guestcmd->thread_handle = (uint64_t)thread_handle; + batch_guestcmd->guest_timeline_handle = (uint64_t)timeline_handle; + writel(0, sync_state->reg_base + SYNC_REG_BATCH_GUESTCOMMAND); + + spin_unlock_irqrestore(&sync_state->lock, irq_flags); +} + +/* |goldfish_sync_interrupt| handles IRQ raises from the virtual device. + * In the context of OpenGL, this interrupt will fire whenever we need + * to signal a fence fd in the guest, with the command + * |CMD_SYNC_TIMELINE_INC|. + * However, because this function will be called in an interrupt context, + * it is necessary to do the actual work of signaling off of interrupt context. + * The shared work queue is used for this purpose. At the end when + * all pending commands are intercepted by the interrupt handler, + * we call |schedule_work|, which will later run the actual + * desired sync command in |goldfish_sync_work_item_fn|. + */ +static irqreturn_t goldfish_sync_interrupt(int irq, void *dev_id) +{ + + struct goldfish_sync_state *sync_state = dev_id; + + uint32_t nextcmd; + uint32_t command_r; + uint64_t handle_rw; + uint32_t time_r; + uint64_t hostcmd_handle_rw; + + int count = 0; + + DTRACE(); + + sync_state = dev_id; + + spin_lock(&sync_state->lock); + + for (;;) { + + readl(sync_state->reg_base + SYNC_REG_BATCH_COMMAND); + nextcmd = sync_state->batch_hostcmd->cmd; + + if (nextcmd == 0) + break; + + command_r = nextcmd; + handle_rw = sync_state->batch_hostcmd->handle; + time_r = sync_state->batch_hostcmd->time_arg; + hostcmd_handle_rw = sync_state->batch_hostcmd->hostcmd_handle; + + goldfish_sync_cmd_queue( + sync_state, + command_r, + handle_rw, + time_r, + hostcmd_handle_rw); + + count++; + } + + spin_unlock(&sync_state->lock); + + schedule_work(&sync_state->work_item); + + return (count == 0) ? IRQ_NONE : IRQ_HANDLED; +} + +/* |goldfish_sync_work_item_fn| does the actual work of servicing + * host->guest sync commands. This function is triggered whenever + * the IRQ for the goldfish sync device is raised. Once it starts + * running, it grabs the contents of the buffer containing the + * commands it needs to execute (there may be multiple, because + * our IRQ is active high and not edge triggered), and then + * runs all of them one after the other. + */ +static void goldfish_sync_work_item_fn(struct work_struct *input) +{ + + struct goldfish_sync_state *sync_state; + int sync_fence_fd; + + struct goldfish_sync_timeline_obj *timeline; + uint64_t timeline_ptr; + + uint64_t hostcmd_handle; + + uint32_t cmd; + uint64_t handle; + uint32_t time_arg; + + struct goldfish_sync_hostcmd *todo; + uint32_t todo_end; + + unsigned long irq_flags; + + struct goldfish_sync_hostcmd to_run[GOLDFISH_SYNC_MAX_CMDS]; + uint32_t i = 0; + + sync_state = container_of(input, struct goldfish_sync_state, work_item); + + mutex_lock(&sync_state->mutex_lock); + + spin_lock_irqsave(&sync_state->lock, irq_flags); { + + todo_end = sync_state->to_do_end; + + DPRINT("num sync todos: %u", sync_state->to_do_end); + + for (i = 0; i < todo_end; i++) + to_run[i] = sync_state->to_do[i]; + + /* We expect that commands will come in at a slow enough rate + * so that incoming items will not be more than + * GOLDFISH_SYNC_MAX_CMDS. + * + * This is because the way the sync device is used, + * it's only for managing buffer data transfers per frame, + * with a sequential dependency between putting things in + * to_do and taking them out. Once a set of commands is + * queued up in to_do, the user of the device waits for + * them to be processed before queuing additional commands, + * which limits the rate at which commands come in + * to the rate at which we take them out here. + * + * We also don't expect more than MAX_CMDS to be issued + * at once; there is a correspondence between + * which buffers need swapping to the (display / buffer queue) + * to particular commands, and we don't expect there to be + * enough display or buffer queues in operation at once + * to overrun GOLDFISH_SYNC_MAX_CMDS. + */ + sync_state->to_do_end = 0; + + } spin_unlock_irqrestore(&sync_state->lock, irq_flags); + + for (i = 0; i < todo_end; i++) { + DPRINT("todo index: %u", i); + + todo = &to_run[i]; + + cmd = todo->cmd; + + handle = (uint64_t)todo->handle; + time_arg = todo->time_arg; + hostcmd_handle = (uint64_t)todo->hostcmd_handle; + + DTRACE(); + + timeline = (struct goldfish_sync_timeline_obj *)(uintptr_t)handle; + + switch (cmd) { + case CMD_SYNC_READY: + break; + case CMD_CREATE_SYNC_TIMELINE: + DPRINT("exec CMD_CREATE_SYNC_TIMELINE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + timeline = goldfish_sync_timeline_create(); + timeline_ptr = (uintptr_t)timeline; + goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_TIMELINE, + timeline_ptr, + 0, + hostcmd_handle); + DPRINT("sync timeline created: %p", timeline); + break; + case CMD_CREATE_SYNC_FENCE: + DPRINT("exec CMD_CREATE_SYNC_FENCE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + sync_fence_fd = goldfish_sync_fence_create(timeline, time_arg); + goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_FENCE, + sync_fence_fd, + 0, + hostcmd_handle); + break; + case CMD_SYNC_TIMELINE_INC: + DPRINT("exec CMD_SYNC_TIMELINE_INC: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + goldfish_sync_timeline_inc(timeline, time_arg); + break; + case CMD_DESTROY_SYNC_TIMELINE: + DPRINT("exec CMD_DESTROY_SYNC_TIMELINE: " + "handle=0x%llx time_arg=%d", + handle, time_arg); + goldfish_sync_timeline_destroy(timeline); + break; + } + DPRINT("Done executing sync command"); + } + mutex_unlock(&sync_state->mutex_lock); +} + +/* Guest-side interface: file operations */ + +/* Goldfish sync context and ioctl info. + * + * When a sync context is created by open()-ing the goldfish sync device, we + * create a sync context (|goldfish_sync_context|). + * + * Currently, the only data required to track is the sync timeline itself + * along with the current time, which are all packed up in the + * |goldfish_sync_timeline_obj| field. We use a |goldfish_sync_context| + * as the filp->private_data. + * + * Next, when a sync context user requests that work be queued and a fence + * fd provided, we use the |goldfish_sync_ioctl_info| struct, which holds + * information about which host handles to touch for this particular + * queue-work operation. We need to know about the host-side sync thread + * and the particular host-side GLsync object. We also possibly write out + * a file descriptor. + */ +struct goldfish_sync_context { + struct goldfish_sync_timeline_obj *timeline; +}; + +struct goldfish_sync_ioctl_info { + uint64_t host_glsync_handle_in; + uint64_t host_syncthread_handle_in; + int fence_fd_out; +}; + +static int goldfish_sync_open(struct inode *inode, struct file *file) +{ + + struct goldfish_sync_context *sync_context; + + DTRACE(); + + mutex_lock(&global_sync_state->mutex_lock); + + sync_context = kzalloc(sizeof(struct goldfish_sync_context), GFP_KERNEL); + + if (sync_context == NULL) { + ERR("Creation of goldfish sync context failed!"); + mutex_unlock(&global_sync_state->mutex_lock); + return -ENOMEM; + } + + sync_context->timeline = NULL; + + file->private_data = sync_context; + + DPRINT("successfully create a sync context @0x%p", sync_context); + + mutex_unlock(&global_sync_state->mutex_lock); + + return 0; +} + +static int goldfish_sync_release(struct inode *inode, struct file *file) +{ + + struct goldfish_sync_context *sync_context; + + DTRACE(); + + mutex_lock(&global_sync_state->mutex_lock); + + sync_context = file->private_data; + + if (sync_context->timeline) + goldfish_sync_timeline_destroy(sync_context->timeline); + + sync_context->timeline = NULL; + + kfree(sync_context); + + mutex_unlock(&global_sync_state->mutex_lock); + + return 0; +} + +/* |goldfish_sync_ioctl| is the guest-facing interface of goldfish sync + * and is used in conjunction with eglCreateSyncKHR to queue up the + * actual work of waiting for the EGL sync command to complete, + * possibly returning a fence fd to the guest. + */ +static long goldfish_sync_ioctl(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + struct goldfish_sync_context *sync_context_data; + struct goldfish_sync_timeline_obj *timeline; + int fd_out; + struct goldfish_sync_ioctl_info ioctl_data; + + DTRACE(); + + sync_context_data = file->private_data; + fd_out = -1; + + switch (cmd) { + case GOLDFISH_SYNC_IOC_QUEUE_WORK: + + DPRINT("exec GOLDFISH_SYNC_IOC_QUEUE_WORK"); + + mutex_lock(&global_sync_state->mutex_lock); + + if (copy_from_user(&ioctl_data, + (void __user *)arg, + sizeof(ioctl_data))) { + ERR("Failed to copy memory for ioctl_data from user."); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + if (ioctl_data.host_syncthread_handle_in == 0) { + DPRINT("Error: zero host syncthread handle!!!"); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + if (!sync_context_data->timeline) { + DPRINT("no timeline yet, create one."); + sync_context_data->timeline = goldfish_sync_timeline_create(); + DPRINT("timeline: 0x%p", &sync_context_data->timeline); + } + + timeline = sync_context_data->timeline; + fd_out = goldfish_sync_fence_create(timeline, + timeline->current_time + 1); + DPRINT("Created fence with fd %d and current time %u (timeline: 0x%p)", + fd_out, + sync_context_data->timeline->current_time + 1, + sync_context_data->timeline); + + ioctl_data.fence_fd_out = fd_out; + + if (copy_to_user((void __user *)arg, + &ioctl_data, + sizeof(ioctl_data))) { + DPRINT("Error, could not copy to user!!!"); + + sys_close(fd_out); + /* We won't be doing an increment, kref_put immediately. */ + kref_put(&timeline->kref, delete_timeline_obj); + mutex_unlock(&global_sync_state->mutex_lock); + return -EFAULT; + } + + /* We are now about to trigger a host-side wait; + * accumulate on |pending_waits|. */ + goldfish_sync_send_guestcmd(global_sync_state, + CMD_TRIGGER_HOST_WAIT, + ioctl_data.host_glsync_handle_in, + ioctl_data.host_syncthread_handle_in, + (uint64_t)(uintptr_t)(sync_context_data->timeline)); + + mutex_unlock(&global_sync_state->mutex_lock); + return 0; + default: + return -ENOTTY; + } +} + +static const struct file_operations goldfish_sync_fops = { + .owner = THIS_MODULE, + .open = goldfish_sync_open, + .release = goldfish_sync_release, + .unlocked_ioctl = goldfish_sync_ioctl, + .compat_ioctl = goldfish_sync_ioctl, +}; + +static struct miscdevice goldfish_sync_device = { + .name = "goldfish_sync", + .fops = &goldfish_sync_fops, +}; + + +static bool setup_verify_batch_cmd_addr(struct goldfish_sync_state *sync_state, + void *batch_addr, + uint32_t addr_offset, + uint32_t addr_offset_high) +{ + uint64_t batch_addr_phys; + uint32_t batch_addr_phys_test_lo; + uint32_t batch_addr_phys_test_hi; + + if (!batch_addr) { + ERR("Could not use batch command address!"); + return false; + } + + batch_addr_phys = virt_to_phys(batch_addr); + writel((uint32_t)(batch_addr_phys), + sync_state->reg_base + addr_offset); + writel((uint32_t)(batch_addr_phys >> 32), + sync_state->reg_base + addr_offset_high); + + batch_addr_phys_test_lo = + readl(sync_state->reg_base + addr_offset); + batch_addr_phys_test_hi = + readl(sync_state->reg_base + addr_offset_high); + + if (virt_to_phys(batch_addr) != + (((uint64_t)batch_addr_phys_test_hi << 32) | + batch_addr_phys_test_lo)) { + ERR("Invalid batch command address!"); + return false; + } + + return true; +} + +int goldfish_sync_probe(struct platform_device *pdev) +{ + struct resource *ioresource; + struct goldfish_sync_state *sync_state = global_sync_state; + int status; + + DTRACE(); + + sync_state->to_do_end = 0; + + spin_lock_init(&sync_state->lock); + mutex_init(&sync_state->mutex_lock); + + platform_set_drvdata(pdev, sync_state); + + ioresource = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (ioresource == NULL) { + ERR("platform_get_resource failed"); + return -ENODEV; + } + + sync_state->reg_base = devm_ioremap(&pdev->dev, ioresource->start, PAGE_SIZE); + if (sync_state->reg_base == NULL) { + ERR("Could not ioremap"); + return -ENOMEM; + } + + sync_state->irq = platform_get_irq(pdev, 0); + if (sync_state->irq < 0) { + ERR("Could not platform_get_irq"); + return -ENODEV; + } + + status = devm_request_irq(&pdev->dev, + sync_state->irq, + goldfish_sync_interrupt, + IRQF_SHARED, + pdev->name, + sync_state); + if (status) { + ERR("request_irq failed"); + return -ENODEV; + } + + INIT_WORK(&sync_state->work_item, + goldfish_sync_work_item_fn); + + misc_register(&goldfish_sync_device); + + /* Obtain addresses for batch send/recv of commands. */ + { + struct goldfish_sync_hostcmd *batch_addr_hostcmd; + struct goldfish_sync_guestcmd *batch_addr_guestcmd; + + batch_addr_hostcmd = devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_hostcmd), + GFP_KERNEL); + batch_addr_guestcmd = devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_guestcmd), + GFP_KERNEL); + + if (!setup_verify_batch_cmd_addr(sync_state, + batch_addr_hostcmd, + SYNC_REG_BATCH_COMMAND_ADDR, + SYNC_REG_BATCH_COMMAND_ADDR_HIGH)) { + ERR("goldfish_sync: Could not setup batch command address"); + return -ENODEV; + } + + if (!setup_verify_batch_cmd_addr(sync_state, + batch_addr_guestcmd, + SYNC_REG_BATCH_GUESTCOMMAND_ADDR, + SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH)) { + ERR("goldfish_sync: Could not setup batch guest command address"); + return -ENODEV; + } + + sync_state->batch_hostcmd = batch_addr_hostcmd; + sync_state->batch_guestcmd = batch_addr_guestcmd; + } + + INFO("goldfish_sync: Initialized goldfish sync device"); + + writel(0, sync_state->reg_base + SYNC_REG_INIT); + + return 0; +} + +static int goldfish_sync_remove(struct platform_device *pdev) +{ + struct goldfish_sync_state *sync_state = global_sync_state; + + DTRACE(); + + misc_deregister(&goldfish_sync_device); + memset(sync_state, 0, sizeof(struct goldfish_sync_state)); + return 0; +} + +static const struct of_device_id goldfish_sync_of_match[] = { + { .compatible = "google,goldfish-sync", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_sync_of_match); + +static const struct acpi_device_id goldfish_sync_acpi_match[] = { + { "GFSH0006", 0 }, + { }, +}; + +MODULE_DEVICE_TABLE(acpi, goldfish_sync_acpi_match); + +static struct platform_driver goldfish_sync = { + .probe = goldfish_sync_probe, + .remove = goldfish_sync_remove, + .driver = { + .name = "goldfish_sync", + .of_match_table = goldfish_sync_of_match, + .acpi_match_table = ACPI_PTR(goldfish_sync_acpi_match), + } +}; + +module_platform_driver(goldfish_sync); + +MODULE_AUTHOR("Google, Inc."); +MODULE_DESCRIPTION("Android QEMU Sync Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0"); + +/* This function is only to run a basic test of sync framework. + * It creates a timeline and fence object whose signal point is at 1. + * The timeline is incremented, and we use the sync framework's + * sync_fence_wait on that fence object. If everything works out, + * we should not hang in the wait and return immediately. + * There is no way to explicitly run this test yet, but it + * can be used by inserting it at the end of goldfish_sync_probe. + */ +void test_kernel_sync(void) +{ + struct goldfish_sync_timeline_obj *test_timeline; + int test_fence_fd; + + DTRACE(); + + DPRINT("test sw_sync"); + + test_timeline = goldfish_sync_timeline_create(); + DPRINT("sw_sync_timeline_create -> 0x%p", test_timeline); + + test_fence_fd = goldfish_sync_fence_create(test_timeline, 1); + DPRINT("sync_fence_create -> %d", test_fence_fd); + + DPRINT("incrementing test timeline"); + goldfish_sync_timeline_inc(test_timeline, 1); + + DPRINT("test waiting (should NOT hang)"); + sync_fence_wait( + sync_fence_fdget(test_fence_fd), -1); + + DPRINT("test waiting (afterward)"); +} -- GitLab From 5acb60f60a2c4374cf04b8c26c331ceb9877fa35 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Thu, 23 Jul 2015 10:40:57 -0700 Subject: [PATCH 1066/1442] ANDROID: arch: x86: disable pic for Android toolchain Android toolchains enable PIC, so explicitly disable it with -fno-pic (this is the upstream gcc default) Signed-off-by: Greg Hackmann (cherry picked from commit 892606ece2bebfa5a1ed62e9552cc973707ae9d3) Change-Id: I1e600363e5d18e459479fe4eb23d76855e16868d --- arch/x86/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d449337a360..75725dc4df7a 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -97,6 +97,8 @@ else KBUILD_CFLAGS += $(call cc-option,-mno-80387) KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387) + KBUILD_CFLAGS += -fno-pic + # Use -mpreferred-stack-boundary=3 if supported. KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) -- GitLab From b821439faf7527a11cae356ed79c5dbf58286a8c Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Thu, 17 Nov 2016 17:01:43 -0800 Subject: [PATCH 1067/1442] ANDROID: arm64: rename ranchu defconfig to ranchu64 Change-Id: Ib7cd1ef722167905957623f65c3cc064e9d5c357 --- arch/arm64/configs/{ranchu_defconfig => ranchu64_defconfig} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename arch/arm64/configs/{ranchu_defconfig => ranchu64_defconfig} (100%) diff --git a/arch/arm64/configs/ranchu_defconfig b/arch/arm64/configs/ranchu64_defconfig similarity index 100% rename from arch/arm64/configs/ranchu_defconfig rename to arch/arm64/configs/ranchu64_defconfig -- GitLab From e4bc8c96ba034d2a134680955df8c2a24cbf91c1 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Fri, 18 Nov 2016 13:16:07 +0800 Subject: [PATCH 1068/1442] ANDROID: video: goldfishfb: fix platform_no_drv_owner.cocci warnings drivers/video/fbdev/goldfishfb.c:318:3-8: No need to set .owner here. The core will do it. Remove .owner field if calls are used which set it automatically Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Change-Id: Iffb5a344d19088cc5e1fe7e5fcc3a907fe936802 CC: Greg Hackmann Signed-off-by: Fengguang Wu Signed-off-by: Guenter Roeck --- drivers/video/fbdev/goldfishfb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c index 131fee0341c3..1e56b50e4082 100644 --- a/drivers/video/fbdev/goldfishfb.c +++ b/drivers/video/fbdev/goldfishfb.c @@ -322,7 +322,6 @@ static struct platform_driver goldfish_fb_driver = { .remove = goldfish_fb_remove, .driver = { .name = "goldfish_fb", - .owner = THIS_MODULE, .of_match_table = goldfish_fb_of_match, .acpi_match_table = ACPI_PTR(goldfish_fb_acpi_match), } -- GitLab From 72408a6328b1cb41e246dedcce3ed5cc2cbb4965 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 28 Nov 2016 14:35:22 -0800 Subject: [PATCH 1069/1442] UPSTREAM: timekeeping: Add a fast and NMI safe boot clock This boot clock can be used as a tracing clock and will account for suspend time. To keep it NMI safe since we're accessing from tracing, we're not using a separate timekeeper with updates to monotonic clock and boot offset protected with seqlocks. This has the following minor side effects: (1) Its possible that a timestamp be taken after the boot offset is updated but before the timekeeper is updated. If this happens, the new boot offset is added to the old timekeeping making the clock appear to update slightly earlier: CPU 0 CPU 1 timekeeping_inject_sleeptime64() __timekeeping_inject_sleeptime(tk, delta); timestamp(); timekeeping_update(tk, TK_CLEAR_NTP...); (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be partially updated. Since the tk->offs_boot update is a rare event, this should be a rare occurrence which postprocessing should be able to handle. Bug: b/33184060 Change-Id: If79be2ed9d7a25ac39805b1fd81743026fc96575 Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Richard Cochran Cc: Prarit Bhargava Reviewed-by: Thomas Gleixner Signed-off-by: Joel Fernandes Signed-off-by: John Stultz --- include/linux/timekeeping.h | 1 + kernel/time/timekeeping.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 09168c52ab64..361f8bf1429d 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -249,6 +249,7 @@ static inline u64 ktime_get_raw_ns(void) extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); +extern u64 ktime_get_boot_fast_ns(void); /* * Timespec interfaces utilizing the ktime based ones diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 37dec7e3db43..b2286e94c934 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -425,6 +425,35 @@ u64 ktime_get_raw_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); +/** + * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. + * + * To keep it NMI safe since we're accessing from tracing, we're not using a + * separate timekeeper with updates to monotonic clock and boot offset + * protected with seqlocks. This has the following minor side effects: + * + * (1) Its possible that a timestamp be taken after the boot offset is updated + * but before the timekeeper is updated. If this happens, the new boot offset + * is added to the old timekeeping making the clock appear to update slightly + * earlier: + * CPU 0 CPU 1 + * timekeeping_inject_sleeptime64() + * __timekeeping_inject_sleeptime(tk, delta); + * timestamp(); + * timekeeping_update(tk, TK_CLEAR_NTP...); + * + * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be + * partially updated. Since the tk->offs_boot update is a rare event, this + * should be a rare occurrence which postprocessing should be able to handle. + */ +u64 notrace ktime_get_boot_fast_ns(void) +{ + struct timekeeper *tk = &tk_core.timekeeper; + + return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot)); +} +EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); + /* Suspend-time cycles value for halted fast timekeeper. */ static cycle_t cycles_at_suspend; -- GitLab From 59cbbe30d934386155e3604071b4f25575ff5f4b Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 28 Nov 2016 14:35:23 -0800 Subject: [PATCH 1070/1442] UPSTREAM: trace: Add an option for boot clock as trace clock Unlike monotonic clock, boot clock as a trace clock will account for time spent in suspend useful for tracing suspend/resume. This uses earlier introduced infrastructure for using the fast boot clock. Bug: b/33184060 Change-Id: I144f07560abc35b92ee9a54de39f33574c218302 Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Richard Cochran Cc: Prarit Bhargava Reviewed-by: Thomas Gleixner Signed-off-by: Joel Fernandes Signed-off-by: John Stultz Acked-by: Steven Rostedt --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ca5cf512e57a..2c99e77209ee 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1125,6 +1125,7 @@ static struct { { trace_clock, "perf", 1 }, { ktime_get_mono_fast_ns, "mono", 1 }, { ktime_get_raw_fast_ns, "mono_raw", 1 }, + { ktime_get_boot_fast_ns, "boot", 1 }, ARCH_TRACE_CLOCKS }; -- GitLab From 2b0491efda89ac073eef7bfa42ce38d58846e148 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 28 Nov 2016 14:35:24 -0800 Subject: [PATCH 1071/1442] UPSTREAM: trace: Update documentation for mono, mono_raw and boot clock Documentation was missing for mono and mono_raw, add them and also for the boot clock introduced in this series. Change-Id: Ib358ca287eff31977143166e234335c7b53e1812 Signed-off-by: Joel Fernandes Signed-off-by: John Stultz Reviewed-by: Thomas Gleixner Acked-by: Steven Rostedt Cc: Prarit Bhargava Cc: Richard Cochran Link: http://lkml.kernel.org/r/1480372524-15181-8-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- Documentation/trace/ftrace.txt | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index e20aacb9a6e8..91723ed53470 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -362,6 +362,26 @@ of ftrace. Here is a list of some of the key files: to correlate events across hypervisor/guest if tb_offset is known. + mono: This uses the fast monotonic clock (CLOCK_MONOTONIC) + which is monotonic and is subject to NTP rate adjustments. + + mono_raw: + This is the raw monotonic clock (CLOCK_MONOTONIC_RAW) + which is montonic but is not subject to any rate adjustments + and ticks at the same rate as the hardware clocksource. + + boot: This is the boot clock (CLOCK_BOOTTIME) and is based on the + fast monotonic clock, but also accounts for time spent in + suspend. Since the clock access is designed for use in + tracing in the suspend path, some side effects are possible + if clock is accessed after the suspend time is accounted before + the fast mono clock is updated. In this case, the clock update + appears to happen slightly sooner than it normally would have. + Also on 32-bit systems, it's possible that the 64-bit boot offset + sees a partial update. These effects are rare and post + processing should be able to handle them. See comments in the + ktime_get_boot_fast_ns() function for more information. + To set a clock, simply echo the clock name into this file. echo global > trace_clock -- GitLab From a716dc568561874b56c90b0a9ee8928b183319d6 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Mon, 12 Sep 2016 15:51:35 -0700 Subject: [PATCH 1072/1442] ANDROID: build: add build server configs for goldfish Change-Id: Icd7a8d44df2b09394be5c6230c64ecb374cae236 --- build.config.goldfish.arm | 12 ++++++++++++ build.config.goldfish.arm64 | 12 ++++++++++++ build.config.goldfish.mips | 11 +++++++++++ build.config.goldfish.mips64 | 11 +++++++++++ build.config.goldfish.x86 | 12 ++++++++++++ build.config.goldfish.x86_64 | 12 ++++++++++++ 6 files changed, 70 insertions(+) create mode 100644 build.config.goldfish.arm create mode 100644 build.config.goldfish.arm64 create mode 100644 build.config.goldfish.mips create mode 100644 build.config.goldfish.mips64 create mode 100644 build.config.goldfish.x86 create mode 100644 build.config.goldfish.x86_64 diff --git a/build.config.goldfish.arm b/build.config.goldfish.arm new file mode 100644 index 000000000000..bab53668e033 --- /dev/null +++ b/build.config.goldfish.arm @@ -0,0 +1,12 @@ +ARCH=arm +BRANCH=android-4.4 +CROSS_COMPILE=arm-linux-androidkernel- +DEFCONFIG=ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=goldfish +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/arm/arm-linux-androideabi-4.9/bin +FILES=" +arch/arm/boot/zImage +vmlinux +System.map +" diff --git a/build.config.goldfish.arm64 b/build.config.goldfish.arm64 new file mode 100644 index 000000000000..0b4c40604b76 --- /dev/null +++ b/build.config.goldfish.arm64 @@ -0,0 +1,12 @@ +ARCH=arm64 +BRANCH=android-4.4 +CROSS_COMPILE=aarch64-linux-android- +DEFCONFIG=ranchu64_defconfig +EXTRA_CMDS='' +KERNEL_DIR=goldfish +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin +FILES=" +arch/arm64/boot/Image +vmlinux +System.map +" diff --git a/build.config.goldfish.mips b/build.config.goldfish.mips new file mode 100644 index 000000000000..5dcd8a181ec0 --- /dev/null +++ b/build.config.goldfish.mips @@ -0,0 +1,11 @@ +ARCH=mips +BRANCH=android-4.4 +CROSS_COMPILE=mips64el-linux-android- +DEFCONFIG=ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=goldfish +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin +FILES=" +vmlinux +System.map +" diff --git a/build.config.goldfish.mips64 b/build.config.goldfish.mips64 new file mode 100644 index 000000000000..9c0b6cbfdb9b --- /dev/null +++ b/build.config.goldfish.mips64 @@ -0,0 +1,11 @@ +ARCH=mips +BRANCH=android-4.4 +CROSS_COMPILE=mips64el-linux-android- +DEFCONFIG=ranchu64_defconfig +EXTRA_CMDS='' +KERNEL_DIR=goldfish +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin +FILES=" +vmlinux +System.map +" diff --git a/build.config.goldfish.x86 b/build.config.goldfish.x86 new file mode 100644 index 000000000000..2b8a9b75a14b --- /dev/null +++ b/build.config.goldfish.x86 @@ -0,0 +1,12 @@ +ARCH=x86 +BRANCH=android-4.4 +CROSS_COMPILE=x86_64-linux-android- +DEFCONFIG=i386_ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=goldfish +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" diff --git a/build.config.goldfish.x86_64 b/build.config.goldfish.x86_64 new file mode 100644 index 000000000000..940caefc800f --- /dev/null +++ b/build.config.goldfish.x86_64 @@ -0,0 +1,12 @@ +ARCH=x86_64 +BRANCH=android-4.4 +CROSS_COMPILE=x86_64-linux-android- +DEFCONFIG=x86_64_ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=goldfish +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" -- GitLab From eb4228c7a5edcbfe1dc50c49323af6e6861ffaf7 Mon Sep 17 00:00:00 2001 From: Keun-young Park Date: Mon, 14 Nov 2016 18:25:15 -0800 Subject: [PATCH 1073/1442] ANDROID: dm verity: add minimum prefetch size - For device like eMMC, it gives better performance to read more hash blocks at a time. - For android, set it to default 128. For other devices, set it to 1 which is the same as now. - saved boot-up time by 300ms in tested device bug: 32246564 Change-Id: Ibc0401a0cddba64b862a80445844b4e595213621 Cc: Sami Tolvanen Signed-off-by: Keun-young Park --- drivers/md/Kconfig | 16 ++++++++++++++++ drivers/md/dm-verity-target.c | 9 ++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index f550da3beabd..1e66909af4bc 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -458,6 +458,21 @@ config DM_VERITY If unsure, say N. +config DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 + bool "Prefetch size 128" + +config DM_VERITY_HASH_PREFETCH_MIN_SIZE + int "Verity hash prefetch minimum size" + depends on DM_VERITY + range 1 4096 + default 128 if DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 + default 1 + ---help--- + This sets minimum number of hash blocks to prefetch for dm-verity. + For devices like eMMC, having larger prefetch size like 128 can improve + performance with increased memory consumption for keeping more hashes + in RAM. + config DM_VERITY_FEC bool "Verity forward error correction support" depends on DM_VERITY @@ -510,6 +525,7 @@ config DM_ANDROID_VERITY depends on ASYMMETRIC_KEY_TYPE depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE depends on MD_LINEAR + select DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 ---help--- This device-mapper target is virtually a VERITY target. This target is setup by reading the metadata contents piggybacked diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index b53539e26bc2..5d0a9963b108 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -501,6 +501,7 @@ static void verity_prefetch_io(struct work_struct *work) container_of(work, struct dm_verity_prefetch_work, work); struct dm_verity *v = pw->v; int i; + sector_t prefetch_size; for (i = v->levels - 2; i >= 0; i--) { sector_t hash_block_start; @@ -523,8 +524,14 @@ static void verity_prefetch_io(struct work_struct *work) hash_block_end = v->hash_blocks - 1; } no_prefetch_cluster: + // for emmc, it is more efficient to send bigger read + prefetch_size = max((sector_t)CONFIG_DM_VERITY_HASH_PREFETCH_MIN_SIZE, + hash_block_end - hash_block_start + 1); + if ((hash_block_start + prefetch_size) >= (v->hash_start + v->hash_blocks)) { + prefetch_size = hash_block_end - hash_block_start + 1; + } dm_bufio_prefetch(v->bufio, hash_block_start, - hash_block_end - hash_block_start + 1); + prefetch_size); } kfree(pw); -- GitLab From d7d2efab84d55a8957b1104ce610a5331e073818 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Wed, 7 Dec 2016 18:11:48 -0800 Subject: [PATCH 1074/1442] ANDROID: build: fix build config kernel_dir Change-Id: I88b87a9c85990b12dc8174349cfc14eddfb379d2 --- build.config.goldfish.arm | 2 +- build.config.goldfish.arm64 | 2 +- build.config.goldfish.mips | 2 +- build.config.goldfish.mips64 | 2 +- build.config.goldfish.x86 | 2 +- build.config.goldfish.x86_64 | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build.config.goldfish.arm b/build.config.goldfish.arm index bab53668e033..866da9361b71 100644 --- a/build.config.goldfish.arm +++ b/build.config.goldfish.arm @@ -3,7 +3,7 @@ BRANCH=android-4.4 CROSS_COMPILE=arm-linux-androidkernel- DEFCONFIG=ranchu_defconfig EXTRA_CMDS='' -KERNEL_DIR=goldfish +KERNEL_DIR=common LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/arm/arm-linux-androideabi-4.9/bin FILES=" arch/arm/boot/zImage diff --git a/build.config.goldfish.arm64 b/build.config.goldfish.arm64 index 0b4c40604b76..9c963cf4a3d8 100644 --- a/build.config.goldfish.arm64 +++ b/build.config.goldfish.arm64 @@ -3,7 +3,7 @@ BRANCH=android-4.4 CROSS_COMPILE=aarch64-linux-android- DEFCONFIG=ranchu64_defconfig EXTRA_CMDS='' -KERNEL_DIR=goldfish +KERNEL_DIR=common LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin FILES=" arch/arm64/boot/Image diff --git a/build.config.goldfish.mips b/build.config.goldfish.mips index 5dcd8a181ec0..8af53d2c2940 100644 --- a/build.config.goldfish.mips +++ b/build.config.goldfish.mips @@ -3,7 +3,7 @@ BRANCH=android-4.4 CROSS_COMPILE=mips64el-linux-android- DEFCONFIG=ranchu_defconfig EXTRA_CMDS='' -KERNEL_DIR=goldfish +KERNEL_DIR=common LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin FILES=" vmlinux diff --git a/build.config.goldfish.mips64 b/build.config.goldfish.mips64 index 9c0b6cbfdb9b..2a33d36dc4c8 100644 --- a/build.config.goldfish.mips64 +++ b/build.config.goldfish.mips64 @@ -3,7 +3,7 @@ BRANCH=android-4.4 CROSS_COMPILE=mips64el-linux-android- DEFCONFIG=ranchu64_defconfig EXTRA_CMDS='' -KERNEL_DIR=goldfish +KERNEL_DIR=common LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin FILES=" vmlinux diff --git a/build.config.goldfish.x86 b/build.config.goldfish.x86 index 2b8a9b75a14b..f86253f58d4d 100644 --- a/build.config.goldfish.x86 +++ b/build.config.goldfish.x86 @@ -3,7 +3,7 @@ BRANCH=android-4.4 CROSS_COMPILE=x86_64-linux-android- DEFCONFIG=i386_ranchu_defconfig EXTRA_CMDS='' -KERNEL_DIR=goldfish +KERNEL_DIR=common LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin FILES=" arch/x86/boot/bzImage diff --git a/build.config.goldfish.x86_64 b/build.config.goldfish.x86_64 index 940caefc800f..e1738861ec5c 100644 --- a/build.config.goldfish.x86_64 +++ b/build.config.goldfish.x86_64 @@ -3,7 +3,7 @@ BRANCH=android-4.4 CROSS_COMPILE=x86_64-linux-android- DEFCONFIG=x86_64_ranchu_defconfig EXTRA_CMDS='' -KERNEL_DIR=goldfish +KERNEL_DIR=common LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin FILES=" arch/x86/boot/bzImage -- GitLab From 1a03fc05a56989f9b00b2f6aa6011ffbbd0b738f Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Thu, 8 Dec 2016 17:06:03 -0800 Subject: [PATCH 1075/1442] ANDROID: goldfish: enable CONFIG_INET_DIAG_DESTROY Bug: 31648368 Change-Id: I3715cc6474129ba2176be62ed2c0a7d09a6f2ac7 --- arch/arm/configs/ranchu_defconfig | 1 + arch/arm64/configs/ranchu64_defconfig | 1 + arch/x86/configs/i386_ranchu_defconfig | 2 +- arch/x86/configs/x86_64_ranchu_defconfig | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/ranchu_defconfig b/arch/arm/configs/ranchu_defconfig index 35a90af941a4..49e7bbd5825a 100644 --- a/arch/arm/configs/ranchu_defconfig +++ b/arch/arm/configs/ranchu_defconfig @@ -48,6 +48,7 @@ CONFIG_UNIX=y CONFIG_XFRM_USER=y CONFIG_NET_KEY=y CONFIG_INET=y +CONFIG_INET_DIAG_DESTROY=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y diff --git a/arch/arm64/configs/ranchu64_defconfig b/arch/arm64/configs/ranchu64_defconfig index 00eb346e0928..fc55008d8c4c 100644 --- a/arch/arm64/configs/ranchu64_defconfig +++ b/arch/arm64/configs/ranchu64_defconfig @@ -50,6 +50,7 @@ CONFIG_UNIX=y CONFIG_XFRM_USER=y CONFIG_NET_KEY=y CONFIG_INET=y +CONFIG_INET_DIAG_DESTROY=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig index 0206eb8cfb61..65ed8c8f8444 100644 --- a/arch/x86/configs/i386_ranchu_defconfig +++ b/arch/x86/configs/i386_ranchu_defconfig @@ -89,7 +89,7 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_ESP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_LRO is not set -# CONFIG_INET_DIAG is not set +CONFIG_INET_DIAG_DESTROY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig index dd389774bacb..d977bd91e390 100644 --- a/arch/x86/configs/x86_64_ranchu_defconfig +++ b/arch/x86/configs/x86_64_ranchu_defconfig @@ -87,7 +87,7 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_ESP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_LRO is not set -# CONFIG_INET_DIAG is not set +CONFIG_INET_DIAG_DESTROY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y -- GitLab From 2bed6160b36700a2cc84105df93ec96dff74d7d5 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Wed, 14 Dec 2016 10:21:07 -0800 Subject: [PATCH 1076/1442] ANDROID: goldfish: disable GOLDFISH_SYNC 4.9 kernel removed sw_sync. Need to rework goldfish sync driver. Disable it for now to fix build. Change-Id: Ie4e5e6b0ef318523752dbf27f1b7f0995109f599 --- arch/x86/configs/i386_ranchu_defconfig | 1 - arch/x86/configs/x86_64_ranchu_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig index 65ed8c8f8444..11181a63a311 100644 --- a/arch/x86/configs/i386_ranchu_defconfig +++ b/arch/x86/configs/i386_ranchu_defconfig @@ -363,7 +363,6 @@ CONFIG_SYNC=y CONFIG_SW_SYNC=y CONFIG_ION=y CONFIG_GOLDFISH_AUDIO=y -CONFIG_GOLDFISH_SYNC=y CONFIG_SND_HDA_INTEL=y CONFIG_GOLDFISH=y CONFIG_GOLDFISH_PIPE=y diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig index d977bd91e390..a62b86769cde 100644 --- a/arch/x86/configs/x86_64_ranchu_defconfig +++ b/arch/x86/configs/x86_64_ranchu_defconfig @@ -360,7 +360,6 @@ CONFIG_SYNC=y CONFIG_SW_SYNC=y CONFIG_ION=y CONFIG_GOLDFISH_AUDIO=y -CONFIG_GOLDFISH_SYNC=y CONFIG_SND_HDA_INTEL=y CONFIG_GOLDFISH=y CONFIG_GOLDFISH_PIPE=y -- GitLab From ba574b64bc7a17a36b2517ce7d6d0a5a77873acf Mon Sep 17 00:00:00 2001 From: Yurii Zubrytskyi Date: Wed, 4 May 2016 13:05:38 -0700 Subject: [PATCH 1077/1442] ANDROID: goldfish_pipe: bugfixes and performance improvements. Combine following patches from android-goldfish-3.18 branch: c0f015a [pipe] Fix the pipe driver for x64 platform + correct pages count 48e6bf5 [pipe] Use get_use_pages_fast() which is possibly faster fb20f13 [goldfish] More pages in goldfish pipe f180e6d goldfish_pipe: Return from read_write on signal and EIO 3dec3b7 [pipe] Fix a minor leak in setup_access_params_addr() Change-Id: I1041fd65d7faaec123e6cedd3dbbc5a2fbb86c4d --- drivers/platform/goldfish/goldfish_pipe.c | 108 ++++++++++++++-------- 1 file changed, 71 insertions(+), 37 deletions(-) diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 198d16da025d..55a74e484c16 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -110,6 +110,16 @@ #define PIPE_WAKE_READ (1 << 1) /* pipe can now be read from */ #define PIPE_WAKE_WRITE (1 << 2) /* pipe can now be written to */ +#define MAX_PAGES_TO_GRAB 32 + +#define DEBUG 0 + +#if DEBUG +#define DPRINT(...) { printk(KERN_ERR __VA_ARGS__); } +#else +#define DPRINT(...) +#endif + struct access_params { unsigned long channel; u32 size; @@ -231,8 +241,10 @@ static int setup_access_params_addr(struct platform_device *pdev, if (valid_batchbuffer_addr(dev, aps)) { dev->aps = aps; return 0; - } else + } else { + devm_kfree(&pdev->dev, aps); return -1; + } } /* A value that will not be set by qemu emulator */ @@ -269,6 +281,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, struct goldfish_pipe *pipe = filp->private_data; struct goldfish_pipe_dev *dev = pipe->dev; unsigned long address, address_end; + struct page* pages[MAX_PAGES_TO_GRAB] = {}; int count = 0, ret = -EINVAL; /* If the emulator already closed the pipe, no need to go further */ @@ -293,45 +306,58 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, while (address < address_end) { unsigned long page_end = (address & PAGE_MASK) + PAGE_SIZE; - unsigned long next = page_end < address_end ? page_end - : address_end; - unsigned long avail = next - address; - int status, wakeBit; - struct page *page; - - /* Either vaddr or paddr depending on the device version */ - unsigned long xaddr; + unsigned long next, avail; + int status, wakeBit, page_i, num_contiguous_pages; + long first_page, last_page, requested_pages; + unsigned long xaddr, xaddr_prev, xaddr_i; /* - * We grab the pages on a page-by-page basis in case user - * space gives us a potentially huge buffer but the read only - * returns a small amount, then there's no need to pin that - * much memory to the process. + * Attempt to grab multiple physically contiguous pages. */ - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(address, 1, is_write ? 0 : FOLL_WRITE, - &page, NULL); - up_read(¤t->mm->mmap_sem); - if (ret < 0) - break; + first_page = address & PAGE_MASK; + last_page = (address_end - 1) & PAGE_MASK; + requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1; + if (requested_pages > MAX_PAGES_TO_GRAB) { + requested_pages = MAX_PAGES_TO_GRAB; + } + ret = get_user_pages_fast(first_page, requested_pages, + !is_write, pages); + + DPRINT("%s: requested pages: %d %d\n", __FUNCTION__, ret, requested_pages); + if (ret == 0) { + DPRINT("%s: error: (requested pages == 0) (wanted %d)\n", + __FUNCTION__, requested_pages); + return ret; + } + if (ret < 0) { + DPRINT("%s: (requested pages < 0) %d \n", + __FUNCTION__, requested_pages); + return ret; + } - if (dev->version) { - /* Device version 1 or newer (qemu-android) expects the - * physical address. - */ - xaddr = page_to_phys(page) | (address & ~PAGE_MASK); - } else { - /* Device version 0 (classic emulator) expects the - * virtual address. - */ - xaddr = address; + xaddr = page_to_phys(pages[0]) | (address & ~PAGE_MASK); + xaddr_prev = xaddr; + num_contiguous_pages = ret == 0 ? 0 : 1; + for (page_i = 1; page_i < ret; page_i++) { + xaddr_i = page_to_phys(pages[page_i]) | (address & ~PAGE_MASK); + if (xaddr_i == xaddr_prev + PAGE_SIZE) { + page_end += PAGE_SIZE; + xaddr_prev = xaddr_i; + num_contiguous_pages++; + } else { + DPRINT("%s: discontinuous page boundary: %d pages instead\n", + __FUNCTION__, page_i); + break; + } } + next = page_end < address_end ? page_end : address_end; + avail = next - address; /* Now, try to transfer the bytes in the current page */ spin_lock_irqsave(&dev->lock, irq_flags); if (access_with_param(dev, - is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER, - xaddr, avail, pipe, &status)) { + is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER, + xaddr, avail, pipe, &status)) { gf_write_ptr(pipe, dev->base + PIPE_REG_CHANNEL, dev->base + PIPE_REG_CHANNEL_HIGH); writel(avail, dev->base + PIPE_REG_SIZE); @@ -344,9 +370,13 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, } spin_unlock_irqrestore(&dev->lock, irq_flags); - if (status > 0 && !is_write) - set_page_dirty(page); - put_page(page); + for (page_i = 0; page_i < ret; page_i++) { + if (status > 0 && !is_write && + page_i < num_contiguous_pages) { + set_page_dirty(pages[page_i]); + } + put_page(pages[page_i]); + } if (status > 0) { /* Correct transfer */ count += status; @@ -368,7 +398,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, */ if (status != PIPE_ERROR_AGAIN) pr_info_ratelimited("goldfish_pipe: backend returned error %d on %s\n", - status, is_write ? "write" : "read"); + status, is_write ? "write" : "read"); ret = 0; break; } @@ -378,7 +408,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, * non-blocking mode, just return the error code. */ if (status != PIPE_ERROR_AGAIN || - (filp->f_flags & O_NONBLOCK) != 0) { + (filp->f_flags & O_NONBLOCK) != 0) { ret = goldfish_pipe_error_convert(status); break; } @@ -392,7 +422,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, /* Tell the emulator we're going to wait for a wake event */ goldfish_cmd(pipe, - is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ); + is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ); /* Unlock the pipe, then wait for the wake signal */ mutex_unlock(&pipe->lock); @@ -538,6 +568,8 @@ static int goldfish_pipe_open(struct inode *inode, struct file *file) pipe->dev = dev; mutex_init(&pipe->lock); + DPRINT("%s: call. pipe_dev pipe_dev=0x%lx new_pipe_addr=0x%lx file=0x%lx\n", __FUNCTION__, pipe_dev, pipe, file); + // spin lock init, write head of list, i guess init_waitqueue_head(&pipe->wake_queue); /* @@ -560,6 +592,7 @@ static int goldfish_pipe_release(struct inode *inode, struct file *filp) { struct goldfish_pipe *pipe = filp->private_data; + DPRINT("%s: call. pipe=0x%lx file=0x%lx\n", __FUNCTION__, pipe, filp); /* The guest is closing the channel, so tell the emulator right now */ goldfish_cmd(pipe, CMD_CLOSE); kfree(pipe); @@ -584,6 +617,7 @@ static struct miscdevice goldfish_pipe_device = { static int goldfish_pipe_probe(struct platform_device *pdev) { + DPRINT("%s: call. platform_device=0x%lx\n", __FUNCTION__, pdev); int err; struct resource *r; struct goldfish_pipe_dev *dev = pipe_dev; -- GitLab From f9101247e1ea05870b489bfb7c9bcff68b9bdbc3 Mon Sep 17 00:00:00 2001 From: Yurii Zubrytskyi Date: Fri, 29 Jul 2016 10:51:46 -0700 Subject: [PATCH 1078/1442] ANDROID: goldfish_pipe: An implementation of more parallel pipe This is a driver code for a redesigned android pipe. Currently it works for x86 and x64 emulators with the following performance results: ADB push to /dev/null, Ubuntu, 400 MB file, times are for 1/10/100 parallel adb commands x86 adb push: (4.4s / 11.5s / 2m10s) -> (2.8s / 6s / 51s) x64 adb push: (7s / 15s / (too long, 6m+) -> (2.7s / 6.2s / 52s) ADB pull and push to /data/ have the same %% of speedup More importantly, I don't see any signs of slowdowns when run in parallel with Antutu benchmark, so it is definitely making much better job at multithreading. The code features dynamic host detection: old emulator gets the previous version of the pipe driver code. Combine follow patch from android-goldfish-3.10 b543285 [pipe] Increase the default pipe buffers size, make it configurable Signed-off-by: "Yurii Zubrytskyi" Change-Id: I140d506204cab6e78dd503e5a43abc8886e4ffff --- drivers/platform/goldfish/Makefile | 2 +- drivers/platform/goldfish/goldfish_pipe.c | 170 +--- drivers/platform/goldfish/goldfish_pipe.h | 92 ++ drivers/platform/goldfish/goldfish_pipe_v2.c | 888 +++++++++++++++++++ 4 files changed, 1006 insertions(+), 146 deletions(-) create mode 100644 drivers/platform/goldfish/goldfish_pipe.h create mode 100644 drivers/platform/goldfish/goldfish_pipe_v2.c diff --git a/drivers/platform/goldfish/Makefile b/drivers/platform/goldfish/Makefile index d3487125838c..e53ae2fc717b 100644 --- a/drivers/platform/goldfish/Makefile +++ b/drivers/platform/goldfish/Makefile @@ -2,4 +2,4 @@ # Makefile for Goldfish platform specific drivers # obj-$(CONFIG_GOLDFISH_BUS) += pdev_bus.o -obj-$(CONFIG_GOLDFISH_PIPE) += goldfish_pipe.o +obj-$(CONFIG_GOLDFISH_PIPE) += goldfish_pipe.o goldfish_pipe_v2.o diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 55a74e484c16..1a82e9b73eeb 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -15,52 +15,11 @@ * */ -/* This source file contains the implementation of a special device driver - * that intends to provide a *very* fast communication channel between the - * guest system and the QEMU emulator. - * - * Usage from the guest is simply the following (error handling simplified): - * - * int fd = open("/dev/qemu_pipe",O_RDWR); - * .... write() or read() through the pipe. - * - * This driver doesn't deal with the exact protocol used during the session. - * It is intended to be as simple as something like: - * - * // do this _just_ after opening the fd to connect to a specific - * // emulator service. - * const char* msg = ""; - * if (write(fd, msg, strlen(msg)+1) < 0) { - * ... could not connect to service - * close(fd); - * } - * - * // after this, simply read() and write() to communicate with the - * // service. Exact protocol details left as an exercise to the reader. - * - * This driver is very fast because it doesn't copy any data through - * intermediate buffers, since the emulator is capable of translating - * guest user addresses into host ones. - * - * Note that we must however ensure that each user page involved in the - * exchange is properly mapped during a transfer. +/* This source file contains the implementation of the legacy version of + * a goldfish pipe device driver. See goldfish_pipe_v2.c for the current + * version. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "goldfish_pipe.h" /* * IMPORTANT: The following constants must match the ones used and defined @@ -120,6 +79,14 @@ #define DPRINT(...) #endif +/* This data type models a given pipe instance */ +struct goldfish_pipe { + struct goldfish_pipe_dev *dev; + struct mutex lock; + unsigned long flags; + wait_queue_head_t wake_queue; +}; + struct access_params { unsigned long channel; u32 size; @@ -130,29 +97,6 @@ struct access_params { u32 flags; }; -/* The global driver data. Holds a reference to the i/o page used to - * communicate with the emulator, and a wake queue for blocked tasks - * waiting to be awoken. - */ -struct goldfish_pipe_dev { - spinlock_t lock; - unsigned char __iomem *base; - struct access_params *aps; - int irq; - u32 version; -}; - -static struct goldfish_pipe_dev pipe_dev[1]; - -/* This data type models a given pipe instance */ -struct goldfish_pipe { - struct goldfish_pipe_dev *dev; - struct mutex lock; - unsigned long flags; - wait_queue_head_t wake_queue; -}; - - /* Bit flags for the 'flags' field */ enum { BIT_CLOSED_ON_HOST = 0, /* pipe closed by host */ @@ -323,7 +267,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, ret = get_user_pages_fast(first_page, requested_pages, !is_write, pages); - DPRINT("%s: requested pages: %d %d\n", __FUNCTION__, ret, requested_pages); + DPRINT("%s: requested pages: %d %d %p\n", __FUNCTION__, + ret, requested_pages, first_page); if (ret == 0) { DPRINT("%s: error: (requested pages == 0) (wanted %d)\n", __FUNCTION__, requested_pages); @@ -609,98 +554,33 @@ static const struct file_operations goldfish_pipe_fops = { .release = goldfish_pipe_release, }; -static struct miscdevice goldfish_pipe_device = { +static struct miscdevice goldfish_pipe_dev = { .minor = MISC_DYNAMIC_MINOR, .name = "goldfish_pipe", .fops = &goldfish_pipe_fops, }; -static int goldfish_pipe_probe(struct platform_device *pdev) +int goldfish_pipe_device_init_v1(struct platform_device *pdev) { - DPRINT("%s: call. platform_device=0x%lx\n", __FUNCTION__, pdev); - int err; - struct resource *r; struct goldfish_pipe_dev *dev = pipe_dev; - - /* not thread safe, but this should not happen */ - WARN_ON(dev->base != NULL); - - spin_lock_init(&dev->lock); - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (r == NULL || resource_size(r) < PAGE_SIZE) { - dev_err(&pdev->dev, "can't allocate i/o page\n"); - return -EINVAL; - } - dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE); - if (dev->base == NULL) { - dev_err(&pdev->dev, "ioremap failed\n"); - return -EINVAL; - } - - r = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (r == NULL) { - err = -EINVAL; - goto error; - } - dev->irq = r->start; - - err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt, + int err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt, IRQF_SHARED, "goldfish_pipe", dev); if (err) { - dev_err(&pdev->dev, "unable to allocate IRQ\n"); - goto error; + dev_err(&pdev->dev, "unable to allocate IRQ for v1\n"); + return err; } - err = misc_register(&goldfish_pipe_device); + err = misc_register(&goldfish_pipe_dev); if (err) { - dev_err(&pdev->dev, "unable to register device\n"); - goto error; + dev_err(&pdev->dev, "unable to register v1 device\n"); + return err; } - setup_access_params_addr(pdev, dev); - /* Although the pipe device in the classic Android emulator does not - * recognize the 'version' register, it won't treat this as an error - * either and will simply return 0, which is fine. - */ - dev->version = readl(dev->base + PIPE_REG_VERSION); + setup_access_params_addr(pdev, dev); return 0; - -error: - dev->base = NULL; - return err; } -static int goldfish_pipe_remove(struct platform_device *pdev) +void goldfish_pipe_device_deinit_v1(struct platform_device *pdev) { - struct goldfish_pipe_dev *dev = pipe_dev; - misc_deregister(&goldfish_pipe_device); - dev->base = NULL; - return 0; + misc_deregister(&goldfish_pipe_dev); } - -static const struct acpi_device_id goldfish_pipe_acpi_match[] = { - { "GFSH0003", 0 }, - { }, -}; -MODULE_DEVICE_TABLE(acpi, goldfish_pipe_acpi_match); - -static const struct of_device_id goldfish_pipe_of_match[] = { - { .compatible = "google,android-pipe", }, - {}, -}; -MODULE_DEVICE_TABLE(of, goldfish_pipe_of_match); - -static struct platform_driver goldfish_pipe = { - .probe = goldfish_pipe_probe, - .remove = goldfish_pipe_remove, - .driver = { - .name = "goldfish_pipe", - .of_match_table = goldfish_pipe_of_match, - .acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match), - } -}; - -module_platform_driver(goldfish_pipe); -MODULE_AUTHOR("David Turner "); -MODULE_LICENSE("GPL"); diff --git a/drivers/platform/goldfish/goldfish_pipe.h b/drivers/platform/goldfish/goldfish_pipe.h new file mode 100644 index 000000000000..6cd1b63be8c9 --- /dev/null +++ b/drivers/platform/goldfish/goldfish_pipe.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef GOLDFISH_PIPE_H +#define GOLDFISH_PIPE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Initialize the legacy version of the pipe device driver */ +int goldfish_pipe_device_init_v1(struct platform_device *pdev); + +/* Deinitialize the legacy version of the pipe device driver */ +void goldfish_pipe_device_deinit_v1(struct platform_device *pdev); + +/* Forward declarations for the device struct */ +struct goldfish_pipe; +struct goldfish_pipe_device_buffers; + +/* The global driver data. Holds a reference to the i/o page used to + * communicate with the emulator, and a wake queue for blocked tasks + * waiting to be awoken. + */ +struct goldfish_pipe_dev { + /* + * Global device spinlock. Protects the following members: + * - pipes, pipes_capacity + * - [*pipes, *pipes + pipes_capacity) - array data + * - first_signalled_pipe, + * goldfish_pipe::prev_signalled, + * goldfish_pipe::next_signalled, + * goldfish_pipe::signalled_flags - all singnalled-related fields, + * in all allocated pipes + * - open_command_params - PIPE_CMD_OPEN-related buffers + * + * It looks like a lot of different fields, but the trick is that the only + * operation that happens often is the signalled pipes array manipulation. + * That's why it's OK for now to keep the rest of the fields under the same + * lock. If we notice too much contention because of PIPE_CMD_OPEN, + * then we should add a separate lock there. + */ + spinlock_t lock; + + /* + * Array of the pipes of |pipes_capacity| elements, + * indexed by goldfish_pipe::id + */ + struct goldfish_pipe **pipes; + u32 pipes_capacity; + + /* Pointers to the buffers host uses for interaction with this driver */ + struct goldfish_pipe_dev_buffers *buffers; + + /* Head of a doubly linked list of signalled pipes */ + struct goldfish_pipe *first_signalled_pipe; + + /* Some device-specific data */ + int irq; + int version; + unsigned char __iomem *base; + + /* v1-specific access parameters */ + struct access_params *aps; +}; + +extern struct goldfish_pipe_dev pipe_dev[1]; + +#endif /* GOLDFISH_PIPE_H */ diff --git a/drivers/platform/goldfish/goldfish_pipe_v2.c b/drivers/platform/goldfish/goldfish_pipe_v2.c new file mode 100644 index 000000000000..bdb039bc7dde --- /dev/null +++ b/drivers/platform/goldfish/goldfish_pipe_v2.c @@ -0,0 +1,888 @@ +/* + * Copyright (C) 2012 Intel, Inc. + * Copyright (C) 2013 Intel, Inc. + * Copyright (C) 2014 Linaro Limited + * Copyright (C) 2011-2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* This source file contains the implementation of a special device driver + * that intends to provide a *very* fast communication channel between the + * guest system and the QEMU emulator. + * + * Usage from the guest is simply the following (error handling simplified): + * + * int fd = open("/dev/qemu_pipe",O_RDWR); + * .... write() or read() through the pipe. + * + * This driver doesn't deal with the exact protocol used during the session. + * It is intended to be as simple as something like: + * + * // do this _just_ after opening the fd to connect to a specific + * // emulator service. + * const char* msg = ""; + * if (write(fd, msg, strlen(msg)+1) < 0) { + * ... could not connect to service + * close(fd); + * } + * + * // after this, simply read() and write() to communicate with the + * // service. Exact protocol details left as an exercise to the reader. + * + * This driver is very fast because it doesn't copy any data through + * intermediate buffers, since the emulator is capable of translating + * guest user addresses into host ones. + * + * Note that we must however ensure that each user page involved in the + * exchange is properly mapped during a transfer. + */ + +#include "goldfish_pipe.h" + + +/* + * Update this when something changes in the driver's behavior so the host + * can benefit from knowing it + */ +enum { + PIPE_DRIVER_VERSION = 2, + PIPE_CURRENT_DEVICE_VERSION = 2 +}; + +/* + * IMPORTANT: The following constants must match the ones used and defined + * in external/qemu/hw/goldfish_pipe.c in the Android source tree. + */ + +/* List of bitflags returned in status of CMD_POLL command */ +enum PipePollFlags { + PIPE_POLL_IN = 1 << 0, + PIPE_POLL_OUT = 1 << 1, + PIPE_POLL_HUP = 1 << 2 +}; + +/* Possible status values used to signal errors - see goldfish_pipe_error_convert */ +enum PipeErrors { + PIPE_ERROR_INVAL = -1, + PIPE_ERROR_AGAIN = -2, + PIPE_ERROR_NOMEM = -3, + PIPE_ERROR_IO = -4 +}; + +/* Bit-flags used to signal events from the emulator */ +enum PipeWakeFlags { + PIPE_WAKE_CLOSED = 1 << 0, /* emulator closed pipe */ + PIPE_WAKE_READ = 1 << 1, /* pipe can now be read from */ + PIPE_WAKE_WRITE = 1 << 2 /* pipe can now be written to */ +}; + +/* Bit flags for the 'flags' field */ +enum PipeFlagsBits { + BIT_CLOSED_ON_HOST = 0, /* pipe closed by host */ + BIT_WAKE_ON_WRITE = 1, /* want to be woken on writes */ + BIT_WAKE_ON_READ = 2, /* want to be woken on reads */ +}; + +enum PipeRegs { + PIPE_REG_CMD = 0, + + PIPE_REG_SIGNAL_BUFFER_HIGH = 4, + PIPE_REG_SIGNAL_BUFFER = 8, + PIPE_REG_SIGNAL_BUFFER_COUNT = 12, + + PIPE_REG_OPEN_BUFFER_HIGH = 20, + PIPE_REG_OPEN_BUFFER = 24, + + PIPE_REG_VERSION = 36, + + PIPE_REG_GET_SIGNALLED = 48, +}; + +enum PipeCmdCode { + PIPE_CMD_OPEN = 1, /* to be used by the pipe device itself */ + PIPE_CMD_CLOSE, + PIPE_CMD_POLL, + PIPE_CMD_WRITE, + PIPE_CMD_WAKE_ON_WRITE, + PIPE_CMD_READ, + PIPE_CMD_WAKE_ON_READ, + + /* + * TODO(zyy): implement a deferred read/write execution to allow parallel + * processing of pipe operations on the host. + */ + PIPE_CMD_WAKE_ON_DONE_IO, +}; + +enum { + MAX_BUFFERS_PER_COMMAND = 336, + MAX_SIGNALLED_PIPES = 64, + INITIAL_PIPES_CAPACITY = 64 +}; + +struct goldfish_pipe_dev; +struct goldfish_pipe; +struct goldfish_pipe_command; + +/* A per-pipe command structure, shared with the host */ +struct goldfish_pipe_command { + s32 cmd; /* PipeCmdCode, guest -> host */ + s32 id; /* pipe id, guest -> host */ + s32 status; /* command execution status, host -> guest */ + s32 reserved; /* to pad to 64-bit boundary */ + union { + /* Parameters for PIPE_CMD_{READ,WRITE} */ + struct { + u32 buffers_count; /* number of buffers, guest -> host */ + s32 consumed_size; /* number of consumed bytes, host -> guest */ + u64 ptrs[MAX_BUFFERS_PER_COMMAND]; /* buffer pointers, guest -> host */ + u32 sizes[MAX_BUFFERS_PER_COMMAND]; /* buffer sizes, guest -> host */ + } rw_params; + }; +}; + +/* A single signalled pipe information */ +struct signalled_pipe_buffer { + u32 id; + u32 flags; +}; + +/* Parameters for the PIPE_CMD_OPEN command */ +struct open_command_param { + u64 command_buffer_ptr; + u32 rw_params_max_count; +}; + +/* Device-level set of buffers shared with the host */ +struct goldfish_pipe_dev_buffers { + struct open_command_param open_command_params; + struct signalled_pipe_buffer signalled_pipe_buffers[MAX_SIGNALLED_PIPES]; +}; + +/* This data type models a given pipe instance */ +struct goldfish_pipe { + u32 id; /* pipe ID - index into goldfish_pipe_dev::pipes array */ + unsigned long flags; /* The wake flags pipe is waiting for + * Note: not protected with any lock, uses atomic operations + * and barriers to make it thread-safe. + */ + unsigned long signalled_flags; /* wake flags host have signalled, + * - protected by goldfish_pipe_dev::lock */ + + struct goldfish_pipe_command *command_buffer; /* A pointer to command buffer */ + + /* doubly linked list of signalled pipes, protected by goldfish_pipe_dev::lock */ + struct goldfish_pipe *prev_signalled; + struct goldfish_pipe *next_signalled; + + /* + * A pipe's own lock. Protects the following: + * - *command_buffer - makes sure a command can safely write its parameters + * to the host and read the results back. + */ + struct mutex lock; + + wait_queue_head_t wake_queue; /* A wake queue for sleeping until host signals an event */ + struct goldfish_pipe_dev *dev; /* Pointer to the parent goldfish_pipe_dev instance */ +}; + +struct goldfish_pipe_dev pipe_dev[1] = {}; + +static int goldfish_cmd_locked(struct goldfish_pipe *pipe, enum PipeCmdCode cmd) +{ + pipe->command_buffer->cmd = cmd; + pipe->command_buffer->status = PIPE_ERROR_INVAL; /* failure by default */ + writel(pipe->id, pipe->dev->base + PIPE_REG_CMD); + return pipe->command_buffer->status; +} + +static int goldfish_cmd(struct goldfish_pipe *pipe, enum PipeCmdCode cmd) +{ + int status; + if (mutex_lock_interruptible(&pipe->lock)) + return PIPE_ERROR_IO; + status = goldfish_cmd_locked(pipe, cmd); + mutex_unlock(&pipe->lock); + return status; +} + +/* + * This function converts an error code returned by the emulator through + * the PIPE_REG_STATUS i/o register into a valid negative errno value. + */ +static int goldfish_pipe_error_convert(int status) +{ + switch (status) { + case PIPE_ERROR_AGAIN: + return -EAGAIN; + case PIPE_ERROR_NOMEM: + return -ENOMEM; + case PIPE_ERROR_IO: + return -EIO; + default: + return -EINVAL; + } +} + +static int pin_user_pages(unsigned long first_page, unsigned long last_page, + unsigned last_page_size, int is_write, + struct page *pages[MAX_BUFFERS_PER_COMMAND], unsigned *iter_last_page_size) +{ + int ret; + int requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1; + if (requested_pages > MAX_BUFFERS_PER_COMMAND) { + requested_pages = MAX_BUFFERS_PER_COMMAND; + *iter_last_page_size = PAGE_SIZE; + } else { + *iter_last_page_size = last_page_size; + } + + ret = get_user_pages_fast( + first_page, requested_pages, !is_write, pages); + if (ret <= 0) + return -EFAULT; + if (ret < requested_pages) + *iter_last_page_size = PAGE_SIZE; + return ret; + +} + +static void release_user_pages(struct page **pages, int pages_count, + int is_write, s32 consumed_size) +{ + int i; + for (i = 0; i < pages_count; i++) { + if (!is_write && consumed_size > 0) { + set_page_dirty(pages[i]); + } + put_page(pages[i]); + } +} + +/* Populate the call parameters, merging adjacent pages together */ +static void populate_rw_params( + struct page **pages, int pages_count, + unsigned long address, unsigned long address_end, + unsigned long first_page, unsigned long last_page, + unsigned iter_last_page_size, int is_write, + struct goldfish_pipe_command *command) +{ + /* + * Process the first page separately - it's the only page that + * needs special handling for its start address. + */ + unsigned long xaddr = page_to_phys(pages[0]); + unsigned long xaddr_prev = xaddr; + int buffer_idx = 0; + int i = 1; + int size_on_page = first_page == last_page + ? (int)(address_end - address) + : (PAGE_SIZE - (address & ~PAGE_MASK)); + command->rw_params.ptrs[0] = (u64)(xaddr | (address & ~PAGE_MASK)); + command->rw_params.sizes[0] = size_on_page; + for (; i < pages_count; ++i) { + xaddr = page_to_phys(pages[i]); + size_on_page = (i == pages_count - 1) ? iter_last_page_size : PAGE_SIZE; + if (xaddr == xaddr_prev + PAGE_SIZE) { + command->rw_params.sizes[buffer_idx] += size_on_page; + } else { + ++buffer_idx; + command->rw_params.ptrs[buffer_idx] = (u64)xaddr; + command->rw_params.sizes[buffer_idx] = size_on_page; + } + xaddr_prev = xaddr; + } + command->rw_params.buffers_count = buffer_idx + 1; +} + +static int transfer_max_buffers(struct goldfish_pipe* pipe, + unsigned long address, unsigned long address_end, int is_write, + unsigned long last_page, unsigned int last_page_size, + s32* consumed_size, int* status) +{ + struct page *pages[MAX_BUFFERS_PER_COMMAND]; + unsigned long first_page = address & PAGE_MASK; + unsigned int iter_last_page_size; + int pages_count = pin_user_pages(first_page, last_page, + last_page_size, is_write, + pages, &iter_last_page_size); + if (pages_count < 0) + return pages_count; + + /* Serialize access to the pipe command buffers */ + if (mutex_lock_interruptible(&pipe->lock)) + return -ERESTARTSYS; + + populate_rw_params(pages, pages_count, address, address_end, + first_page, last_page, iter_last_page_size, is_write, + pipe->command_buffer); + + /* Transfer the data */ + *status = goldfish_cmd_locked(pipe, + is_write ? PIPE_CMD_WRITE : PIPE_CMD_READ); + + *consumed_size = pipe->command_buffer->rw_params.consumed_size; + + mutex_unlock(&pipe->lock); + + release_user_pages(pages, pages_count, is_write, *consumed_size); + + return 0; +} + +static int wait_for_host_signal(struct goldfish_pipe *pipe, int is_write) +{ + u32 wakeBit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ; + set_bit(wakeBit, &pipe->flags); + + /* Tell the emulator we're going to wait for a wake event */ + (void)goldfish_cmd(pipe, + is_write ? PIPE_CMD_WAKE_ON_WRITE : PIPE_CMD_WAKE_ON_READ); + + while (test_bit(wakeBit, &pipe->flags)) { + if (wait_event_interruptible( + pipe->wake_queue, + !test_bit(wakeBit, &pipe->flags))) + return -ERESTARTSYS; + + if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)) + return -EIO; + } + + return 0; +} + +static ssize_t goldfish_pipe_read_write(struct file *filp, + char __user *buffer, size_t bufflen, int is_write) +{ + struct goldfish_pipe *pipe = filp->private_data; + int count = 0, ret = -EINVAL; + unsigned long address, address_end, last_page; + unsigned int last_page_size; + + /* If the emulator already closed the pipe, no need to go further */ + if (unlikely(test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))) + return -EIO; + /* Null reads or writes succeeds */ + if (unlikely(bufflen == 0)) + return 0; + /* Check the buffer range for access */ + if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ, + buffer, bufflen))) + return -EFAULT; + + address = (unsigned long)buffer; + address_end = address + bufflen; + last_page = (address_end - 1) & PAGE_MASK; + last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1; + + while (address < address_end) { + s32 consumed_size; + int status; + ret = transfer_max_buffers(pipe, address, address_end, is_write, + last_page, last_page_size, &consumed_size, &status); + if (ret < 0) + break; + + if (consumed_size > 0) { + /* No matter what's the status, we've transfered something */ + count += consumed_size; + address += consumed_size; + } + if (status > 0) + continue; + if (status == 0) { + /* EOF */ + ret = 0; + break; + } + if (count > 0) { + /* + * An error occured, but we already transfered + * something on one of the previous iterations. + * Just return what we already copied and log this + * err. + */ + if (status != PIPE_ERROR_AGAIN) + pr_info_ratelimited("goldfish_pipe: backend error %d on %s\n", + status, is_write ? "write" : "read"); + break; + } + + /* + * If the error is not PIPE_ERROR_AGAIN, or if we are in + * non-blocking mode, just return the error code. + */ + if (status != PIPE_ERROR_AGAIN || (filp->f_flags & O_NONBLOCK) != 0) { + ret = goldfish_pipe_error_convert(status); + break; + } + + status = wait_for_host_signal(pipe, is_write); + if (status < 0) + return status; + } + + if (count > 0) + return count; + return ret; +} + +static ssize_t goldfish_pipe_read(struct file *filp, char __user *buffer, + size_t bufflen, loff_t *ppos) +{ + return goldfish_pipe_read_write(filp, buffer, bufflen, /* is_write */ 0); +} + +static ssize_t goldfish_pipe_write(struct file *filp, + const char __user *buffer, size_t bufflen, + loff_t *ppos) +{ + return goldfish_pipe_read_write(filp, + /* cast away the const */(char __user *)buffer, bufflen, + /* is_write */ 1); +} + +static unsigned int goldfish_pipe_poll(struct file *filp, poll_table *wait) +{ + struct goldfish_pipe *pipe = filp->private_data; + unsigned int mask = 0; + int status; + + poll_wait(filp, &pipe->wake_queue, wait); + + status = goldfish_cmd(pipe, PIPE_CMD_POLL); + if (status < 0) { + return -ERESTARTSYS; + } + + if (status & PIPE_POLL_IN) + mask |= POLLIN | POLLRDNORM; + if (status & PIPE_POLL_OUT) + mask |= POLLOUT | POLLWRNORM; + if (status & PIPE_POLL_HUP) + mask |= POLLHUP; + if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)) + mask |= POLLERR; + + return mask; +} + +static void signalled_pipes_add_locked(struct goldfish_pipe_dev *dev, + u32 id, u32 flags) +{ + struct goldfish_pipe *pipe; + + BUG_ON(id >= dev->pipes_capacity); + + pipe = dev->pipes[id]; + if (!pipe) + return; + pipe->signalled_flags |= flags; + + if (pipe->prev_signalled || pipe->next_signalled + || dev->first_signalled_pipe == pipe) + return; /* already in the list */ + pipe->next_signalled = dev->first_signalled_pipe; + if (dev->first_signalled_pipe) { + dev->first_signalled_pipe->prev_signalled = pipe; + } + dev->first_signalled_pipe = pipe; +} + +static void signalled_pipes_remove_locked(struct goldfish_pipe_dev *dev, + struct goldfish_pipe *pipe) { + if (pipe->prev_signalled) + pipe->prev_signalled->next_signalled = pipe->next_signalled; + if (pipe->next_signalled) + pipe->next_signalled->prev_signalled = pipe->prev_signalled; + if (pipe == dev->first_signalled_pipe) + dev->first_signalled_pipe = pipe->next_signalled; + pipe->prev_signalled = NULL; + pipe->next_signalled = NULL; +} + +static struct goldfish_pipe *signalled_pipes_pop_front(struct goldfish_pipe_dev *dev, + int *wakes) +{ + struct goldfish_pipe *pipe; + unsigned long flags; + spin_lock_irqsave(&dev->lock, flags); + + pipe = dev->first_signalled_pipe; + if (pipe) { + *wakes = pipe->signalled_flags; + pipe->signalled_flags = 0; + /* + * This is an optimized version of signalled_pipes_remove_locked() - + * we want to make it as fast as possible to wake the sleeping pipe + * operations faster + */ + dev->first_signalled_pipe = pipe->next_signalled; + if (dev->first_signalled_pipe) + dev->first_signalled_pipe->prev_signalled = NULL; + pipe->next_signalled = NULL; + } + + spin_unlock_irqrestore(&dev->lock, flags); + return pipe; +} + +static void goldfish_interrupt_task(unsigned long unused) +{ + struct goldfish_pipe_dev *dev = pipe_dev; + /* Iterate over the signalled pipes and wake them one by one */ + struct goldfish_pipe *pipe; + int wakes; + while ((pipe = signalled_pipes_pop_front(dev, &wakes)) != NULL) { + if (wakes & PIPE_WAKE_CLOSED) { + pipe->flags = 1 << BIT_CLOSED_ON_HOST; + } else { + if (wakes & PIPE_WAKE_READ) + clear_bit(BIT_WAKE_ON_READ, &pipe->flags); + if (wakes & PIPE_WAKE_WRITE) + clear_bit(BIT_WAKE_ON_WRITE, &pipe->flags); + } + /* + * wake_up_interruptible() implies a write barrier, so don't explicitly + * add another one here. + */ + wake_up_interruptible(&pipe->wake_queue); + } +} +DECLARE_TASKLET(goldfish_interrupt_tasklet, goldfish_interrupt_task, 0); + +/* + * The general idea of the interrupt handling: + * + * 1. device raises an interrupt if there's at least one signalled pipe + * 2. IRQ handler reads the signalled pipes and their count from the device + * 3. device writes them into a shared buffer and returns the count + * it only resets the IRQ if it has returned all signalled pipes, + * otherwise it leaves it raised, so IRQ handler will be called + * again for the next chunk + * 4. IRQ handler adds all returned pipes to the device's signalled pipes list + * 5. IRQ handler launches a tasklet to process the signalled pipes from the + * list in a separate context + */ +static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id) +{ + u32 count; + u32 i; + unsigned long flags; + struct goldfish_pipe_dev *dev = dev_id; + if (dev != pipe_dev) + return IRQ_NONE; + + /* Request the signalled pipes from the device */ + spin_lock_irqsave(&dev->lock, flags); + + count = readl(dev->base + PIPE_REG_GET_SIGNALLED); + if (count == 0) { + spin_unlock_irqrestore(&dev->lock, flags); + return IRQ_NONE; + } + if (count > MAX_SIGNALLED_PIPES) + count = MAX_SIGNALLED_PIPES; + + for (i = 0; i < count; ++i) + signalled_pipes_add_locked(dev, + dev->buffers->signalled_pipe_buffers[i].id, + dev->buffers->signalled_pipe_buffers[i].flags); + + spin_unlock_irqrestore(&dev->lock, flags); + + tasklet_schedule(&goldfish_interrupt_tasklet); + return IRQ_HANDLED; +} + +static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev) +{ + int id; + for (id = 0; id < dev->pipes_capacity; ++id) + if (!dev->pipes[id]) + return id; + + { + /* Reallocate the array */ + u32 new_capacity = 2 * dev->pipes_capacity; + struct goldfish_pipe **pipes = + kcalloc(new_capacity, sizeof(*pipes), GFP_KERNEL); + if (!pipes) + return -ENOMEM; + memcpy(pipes, dev->pipes, sizeof(*pipes) * dev->pipes_capacity); + kfree(dev->pipes); + dev->pipes = pipes; + id = dev->pipes_capacity; + dev->pipes_capacity = new_capacity; + } + return id; +} + +/** + * goldfish_pipe_open - open a channel to the AVD + * @inode: inode of device + * @file: file struct of opener + * + * Create a new pipe link between the emulator and the use application. + * Each new request produces a new pipe. + * + * Note: we use the pipe ID as a mux. All goldfish emulations are 32bit + * right now so this is fine. A move to 64bit will need this addressing + */ +static int goldfish_pipe_open(struct inode *inode, struct file *file) +{ + struct goldfish_pipe_dev *dev = pipe_dev; + unsigned long flags; + int id; + int status; + + /* Allocate new pipe kernel object */ + struct goldfish_pipe *pipe = kzalloc(sizeof(*pipe), GFP_KERNEL); + if (pipe == NULL) + return -ENOMEM; + + pipe->dev = dev; + mutex_init(&pipe->lock); + init_waitqueue_head(&pipe->wake_queue); + + /* + * Command buffer needs to be allocated on its own page to make sure it is + * physically contiguous in host's address space. + */ + pipe->command_buffer = + (struct goldfish_pipe_command*)__get_free_page(GFP_KERNEL); + if (!pipe->command_buffer) { + status = -ENOMEM; + goto err_pipe; + } + + spin_lock_irqsave(&dev->lock, flags); + + id = get_free_pipe_id_locked(dev); + if (id < 0) { + status = id; + goto err_id_locked; + } + + dev->pipes[id] = pipe; + pipe->id = id; + pipe->command_buffer->id = id; + + /* Now tell the emulator we're opening a new pipe. */ + dev->buffers->open_command_params.rw_params_max_count = + MAX_BUFFERS_PER_COMMAND; + dev->buffers->open_command_params.command_buffer_ptr = + (u64)(unsigned long)__pa(pipe->command_buffer); + status = goldfish_cmd_locked(pipe, PIPE_CMD_OPEN); + spin_unlock_irqrestore(&dev->lock, flags); + if (status < 0) + goto err_cmd; + /* All is done, save the pipe into the file's private data field */ + file->private_data = pipe; + return 0; + +err_cmd: + spin_lock_irqsave(&dev->lock, flags); + dev->pipes[id] = NULL; +err_id_locked: + spin_unlock_irqrestore(&dev->lock, flags); + free_page((unsigned long)pipe->command_buffer); +err_pipe: + kfree(pipe); + return status; +} + +static int goldfish_pipe_release(struct inode *inode, struct file *filp) +{ + unsigned long flags; + struct goldfish_pipe *pipe = filp->private_data; + struct goldfish_pipe_dev *dev = pipe->dev; + + /* The guest is closing the channel, so tell the emulator right now */ + (void)goldfish_cmd(pipe, PIPE_CMD_CLOSE); + + spin_lock_irqsave(&dev->lock, flags); + dev->pipes[pipe->id] = NULL; + signalled_pipes_remove_locked(dev, pipe); + spin_unlock_irqrestore(&dev->lock, flags); + + filp->private_data = NULL; + free_page((unsigned long)pipe->command_buffer); + kfree(pipe); + return 0; +} + +static const struct file_operations goldfish_pipe_fops = { + .owner = THIS_MODULE, + .read = goldfish_pipe_read, + .write = goldfish_pipe_write, + .poll = goldfish_pipe_poll, + .open = goldfish_pipe_open, + .release = goldfish_pipe_release, +}; + +static struct miscdevice goldfish_pipe_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "goldfish_pipe", + .fops = &goldfish_pipe_fops, +}; + +static int goldfish_pipe_device_init_v2(struct platform_device *pdev) +{ + char *page; + struct goldfish_pipe_dev *dev = pipe_dev; + int err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt, + IRQF_SHARED, "goldfish_pipe", dev); + if (err) { + dev_err(&pdev->dev, "unable to allocate IRQ for v2\n"); + return err; + } + + err = misc_register(&goldfish_pipe_dev); + if (err) { + dev_err(&pdev->dev, "unable to register v2 device\n"); + return err; + } + + dev->first_signalled_pipe = NULL; + dev->pipes_capacity = INITIAL_PIPES_CAPACITY; + dev->pipes = kcalloc(dev->pipes_capacity, sizeof(*dev->pipes), GFP_KERNEL); + if (!dev->pipes) + return -ENOMEM; + + /* + * We're going to pass two buffers, open_command_params and + * signalled_pipe_buffers, to the host. This means each of those buffers + * needs to be contained in a single physical page. The easiest choice is + * to just allocate a page and place the buffers in it. + */ + BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE); + page = (char*)__get_free_page(GFP_KERNEL); + if (!page) { + kfree(dev->pipes); + return -ENOMEM; + } + dev->buffers = (struct goldfish_pipe_dev_buffers*)page; + + /* Send the buffer addresses to the host */ + { + u64 paddr = __pa(&dev->buffers->signalled_pipe_buffers); + writel((u32)(unsigned long)(paddr >> 32), dev->base + PIPE_REG_SIGNAL_BUFFER_HIGH); + writel((u32)(unsigned long)paddr, dev->base + PIPE_REG_SIGNAL_BUFFER); + writel((u32)MAX_SIGNALLED_PIPES, dev->base + PIPE_REG_SIGNAL_BUFFER_COUNT); + + paddr = __pa(&dev->buffers->open_command_params); + writel((u32)(unsigned long)(paddr >> 32), dev->base + PIPE_REG_OPEN_BUFFER_HIGH); + writel((u32)(unsigned long)paddr, dev->base + PIPE_REG_OPEN_BUFFER); + } + return 0; +} + +static void goldfish_pipe_device_deinit_v2(struct platform_device *pdev) { + struct goldfish_pipe_dev *dev = pipe_dev; + misc_deregister(&goldfish_pipe_dev); + kfree(dev->pipes); + free_page((unsigned long)dev->buffers); +} + +static int goldfish_pipe_probe(struct platform_device *pdev) +{ + int err; + struct resource *r; + struct goldfish_pipe_dev *dev = pipe_dev; + + BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE); + + /* not thread safe, but this should not happen */ + WARN_ON(dev->base != NULL); + + spin_lock_init(&dev->lock); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (r == NULL || resource_size(r) < PAGE_SIZE) { + dev_err(&pdev->dev, "can't allocate i/o page\n"); + return -EINVAL; + } + dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE); + if (dev->base == NULL) { + dev_err(&pdev->dev, "ioremap failed\n"); + return -EINVAL; + } + + r = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (r == NULL) { + err = -EINVAL; + goto error; + } + dev->irq = r->start; + + /* + * Exchange the versions with the host device + * + * Note: v1 driver used to not report its version, so we write it before + * reading device version back: this allows the host implementation to + * detect the old driver (if there was no version write before read). + */ + writel((u32)PIPE_DRIVER_VERSION, dev->base + PIPE_REG_VERSION); + dev->version = readl(dev->base + PIPE_REG_VERSION); + if (dev->version < PIPE_CURRENT_DEVICE_VERSION) { + /* initialize the old device version */ + err = goldfish_pipe_device_init_v1(pdev); + } else { + /* Host device supports the new interface */ + err = goldfish_pipe_device_init_v2(pdev); + } + if (!err) + return 0; + +error: + dev->base = NULL; + return err; +} + +static int goldfish_pipe_remove(struct platform_device *pdev) +{ + struct goldfish_pipe_dev *dev = pipe_dev; + if (dev->version < PIPE_CURRENT_DEVICE_VERSION) + goldfish_pipe_device_deinit_v1(pdev); + else + goldfish_pipe_device_deinit_v2(pdev); + dev->base = NULL; + return 0; +} + +static const struct acpi_device_id goldfish_pipe_acpi_match[] = { + { "GFSH0003", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, goldfish_pipe_acpi_match); + +static const struct of_device_id goldfish_pipe_of_match[] = { + { .compatible = "google,android-pipe", }, + {}, +}; +MODULE_DEVICE_TABLE(of, goldfish_pipe_of_match); + +static struct platform_driver goldfish_pipe_driver = { + .probe = goldfish_pipe_probe, + .remove = goldfish_pipe_remove, + .driver = { + .name = "goldfish_pipe", + .of_match_table = goldfish_pipe_of_match, + .acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match), + } +}; + +module_platform_driver(goldfish_pipe_driver); +MODULE_AUTHOR("David Turner "); +MODULE_LICENSE("GPL"); -- GitLab From b95ca37b664642e176b8497fad35ffac3a27f5be Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 18 Nov 2016 07:26:19 +0100 Subject: [PATCH 1079/1442] ANDROID: goldfish_pipe: fix call_kern.cocci warnings Function get_free_pipe_id_locked called on line 671 inside lock on line 669 but uses GFP_KERNEL. Replace with GFP_ATOMIC. Generated by: scripts/coccinelle/locks/call_kern.cocci Change-Id: I49bd59abdf4533dbaa826f48c266883031179c7e CC: Yurii Zubrytskyi Signed-off-by: Julia Lawall Signed-off-by: Fengguang Wu Signed-off-by: Guenter Roeck --- drivers/platform/goldfish/goldfish_pipe_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/goldfish/goldfish_pipe_v2.c b/drivers/platform/goldfish/goldfish_pipe_v2.c index bdb039bc7dde..ad373ed36555 100644 --- a/drivers/platform/goldfish/goldfish_pipe_v2.c +++ b/drivers/platform/goldfish/goldfish_pipe_v2.c @@ -616,7 +616,8 @@ static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev) /* Reallocate the array */ u32 new_capacity = 2 * dev->pipes_capacity; struct goldfish_pipe **pipes = - kcalloc(new_capacity, sizeof(*pipes), GFP_KERNEL); + kcalloc(new_capacity, sizeof(*pipes), + GFP_ATOMIC); if (!pipes) return -ENOMEM; memcpy(pipes, dev->pipes, sizeof(*pipes) * dev->pipes_capacity); -- GitLab From ac95768af0ebbfa7c62bf7d55a4dee5c16406775 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Fri, 18 Nov 2016 11:09:02 -0800 Subject: [PATCH 1080/1442] ANDROID: goldfish: goldfish_pipe: fix locking errors If the get_user_pages_fast() call in goldfish_pipe_read_write() failed, it would return while still holding pipe->lock. goldfish_pipe_read_write() later releases and tries to re-acquire pipe->lock. If the re-acquire call failed, goldfish_pipe_read_write() would try unlock pipe->lock on exit anyway. This fixes the smatch messages: drivers/platform/goldfish/goldfish_pipe.c:392 goldfish_pipe_read_write() error: double unlock 'mutex:&pipe->lock' drivers/platform/goldfish/goldfish_pipe.c:397 goldfish_pipe_read_write() warn: inconsistent returns 'mutex:&pipe->lock'. Change-Id: Ifd06a76b32027ca451a001704ade0c5440ed69c4 Signed-off-by: Greg Hackmann --- drivers/platform/goldfish/goldfish_pipe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 1a82e9b73eeb..91e0a5645799 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -272,11 +272,13 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, if (ret == 0) { DPRINT("%s: error: (requested pages == 0) (wanted %d)\n", __FUNCTION__, requested_pages); + mutex_unlock(&pipe->lock); return ret; } if (ret < 0) { DPRINT("%s: (requested pages < 0) %d \n", __FUNCTION__, requested_pages); + mutex_unlock(&pipe->lock); return ret; } -- GitLab From 192892b2b3a397bf1a73a97f66fa0eed14d5a21a Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Fri, 18 Nov 2016 11:40:40 -0800 Subject: [PATCH 1081/1442] ANDROID: goldfish_pipe: fix allmodconfig build tree: https://android.googlesource.com/kernel/common android-4.4 head: 6297c6ba0d217d5b0998738fbfaff2f04cad77e6 commit: bc43565e1ac5ba3f204886a2275726bb4c3d44e6 [18/20] ANDROID: goldfish_pipe: An implementation of more parallel pipe config: i386-randconfig-s1-201646 (attached as .config) compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901 reproduce: git checkout bc43565e1ac5ba3f204886a2275726bb4c3d44e6 # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): >> ERROR: "goldfish_pipe_device_deinit_v1" [drivers/platform/goldfish/goldfish_pipe_v2.ko] undefined! >> ERROR: "goldfish_pipe_device_init_v1" [drivers/platform/goldfish/goldfish_pipe_v2.ko] undefined! >> ERROR: "pipe_dev" [drivers/platform/goldfish/goldfish_pipe.ko] undefined! Change-Id: Ibd51441edf82e6bb6824acc05ea795570cc374e8 --- drivers/platform/goldfish/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/goldfish/Makefile b/drivers/platform/goldfish/Makefile index e53ae2fc717b..277a820ee4e1 100644 --- a/drivers/platform/goldfish/Makefile +++ b/drivers/platform/goldfish/Makefile @@ -2,4 +2,5 @@ # Makefile for Goldfish platform specific drivers # obj-$(CONFIG_GOLDFISH_BUS) += pdev_bus.o -obj-$(CONFIG_GOLDFISH_PIPE) += goldfish_pipe.o goldfish_pipe_v2.o +obj-$(CONFIG_GOLDFISH_PIPE) += goldfish_pipe_all.o +goldfish_pipe_all-objs := goldfish_pipe.o goldfish_pipe_v2.o -- GitLab From 2a061a87b05f8c0167fbf9944b307b530f749a3e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 8 Sep 2015 17:30:52 -0700 Subject: [PATCH 1082/1442] CHROMIUM: fix warning when releasing active sync point Userspace can close the sync device while there are still active fence points, in which case kernel produces the following warning: [ 43.853176] ------------[ cut here ]------------ [ 43.857834] WARNING: CPU: 0 PID: 892 at /mnt/host/source/src/third_party/kernel/v3.18/drivers/staging/android/sync.c:439 android_fence_release+0x88/0x104() [ 43.871741] CPU: 0 PID: 892 Comm: Binder_5 Tainted: G U 3.18.0-07661-g0550ce9 #1 [ 43.880176] Hardware name: Google Tegra210 Smaug Rev 1+ (DT) [ 43.885834] Call trace: [ 43.888294] [] dump_backtrace+0x0/0x10c [ 43.893697] [] show_stack+0x10/0x1c [ 43.898756] [] dump_stack+0x74/0xb8 [ 43.903814] [] warn_slowpath_common+0x84/0xb0 [ 43.909736] [] warn_slowpath_null+0x14/0x20 [ 43.915482] [] android_fence_release+0x84/0x104 [ 43.921582] [] fence_release+0x104/0x134 [ 43.927066] [] sync_fence_free+0x74/0x9c [ 43.932552] [] sync_fence_release+0x34/0x48 [ 43.938304] [] __fput+0x100/0x1b8 [ 43.943185] [] ____fput+0x8/0x14 [ 43.947982] [] task_work_run+0xb0/0xe4 [ 43.953297] [] do_notify_resume+0x44/0x5c [ 43.958867] ---[ end trace 5a2aa4027cc5d171 ]--- Let's fix it by introducing a new optional callback (disable_signaling) to fence operations so that drivers can do proper clean ups when we remove last callback for given fence. BUG=chrome-os-partner:40303 TEST=Boot Smaug and observe that warning is gone. Change-Id: I05c34dcf74438c28405438c7ead0706b1f810fff Signed-off-by: Dmitry Torokhov Reviewed-on: https://chromium-review.googlesource.com/303409 Reviewed-by: Andrew Bresticker [AmitP: Refactored original changes by renaming android_fence_disable_signaling to timeline_fence_disable_signaling so as to align with the upstream naming convention]. Signed-off-by: Amit Pundir --- drivers/dma-buf/fence.c | 6 +++++- drivers/dma-buf/sw_sync.c | 8 ++++++++ include/linux/fence.h | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c index 4d51f9e83fa8..2453e076ee36 100644 --- a/drivers/dma-buf/fence.c +++ b/drivers/dma-buf/fence.c @@ -304,8 +304,12 @@ fence_remove_callback(struct fence *fence, struct fence_cb *cb) spin_lock_irqsave(fence->lock, flags); ret = !list_empty(&cb->node); - if (ret) + if (ret) { list_del_init(&cb->node); + if (list_empty(&fence->cb_list)) + if (fence->ops->disable_signaling) + fence->ops->disable_signaling(fence); + } spin_unlock_irqrestore(fence->lock, flags); diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 62e8e6dc7953..454d3b3df512 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -234,6 +234,13 @@ static bool timeline_fence_enable_signaling(struct fence *fence) return true; } +static void timeline_fence_disable_signaling(struct fence *fence) +{ + struct sync_pt *pt = container_of(fence, struct sync_pt, base); + + list_del_init(&pt->active_list); +} + static void timeline_fence_value_str(struct fence *fence, char *str, int size) { @@ -252,6 +259,7 @@ static const struct fence_ops timeline_fence_ops = { .get_driver_name = timeline_fence_get_driver_name, .get_timeline_name = timeline_fence_get_timeline_name, .enable_signaling = timeline_fence_enable_signaling, + .disable_signaling = timeline_fence_disable_signaling, .signaled = timeline_fence_signaled, .wait = fence_default_wait, .release = timeline_fence_release, diff --git a/include/linux/fence.h b/include/linux/fence.h index 0d763053f97a..8d7265fe049e 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -108,6 +108,7 @@ struct fence_cb { * @get_driver_name: returns the driver name. * @get_timeline_name: return the name of the context this fence belongs to. * @enable_signaling: enable software signaling of fence. + * @disable_signaling: disable software signaling of fence (optional). * @signaled: [optional] peek whether the fence is signaled, can be null. * @wait: custom wait implementation, or fence_default_wait. * @release: [optional] called on destruction of fence, can be null @@ -167,6 +168,7 @@ struct fence_ops { const char * (*get_driver_name)(struct fence *fence); const char * (*get_timeline_name)(struct fence *fence); bool (*enable_signaling)(struct fence *fence); + void (*disable_signaling)(struct fence *fence); bool (*signaled)(struct fence *fence); signed long (*wait)(struct fence *fence, bool intr, signed long timeout); void (*release)(struct fence *fence); -- GitLab From e2d88788d5b048b40d6e9ab80024a017edafc569 Mon Sep 17 00:00:00 2001 From: Mohan Srinivasan Date: Wed, 14 Dec 2016 15:55:36 -0800 Subject: [PATCH 1083/1442] ANDROID: MMC/UFS IO Latency Histograms. This patch adds a new sysfs node (latency_hist) and reports IO (svc time) latency histograms. Disabled by default, can be enabled by echoing 0 into latency_hist, stats can be cleared by writing 2 into latency_hist. This commit fixes the 32 bit build breakage in the previous commit. Tested on both 32 bit and 64 bit arm devices. (Merged from android4.4-common). Bug: 30677035 Change-Id: I9403ea093f4cd54ebae08f12a11d01ef153118d0 Signed-off-by: Mohan Srinivasan --- block/blk-core.c | 84 +++++++++++++++++++++++++++++++++++++++ drivers/mmc/core/core.c | 75 +++++++++++++++++++++++++++++++++- drivers/mmc/core/host.c | 10 ++++- drivers/mmc/core/host.h | 5 +++ drivers/scsi/ufs/ufshcd.c | 81 +++++++++++++++++++++++++++++++++++++ drivers/scsi/ufs/ufshcd.h | 2 + include/linux/blkdev.h | 76 +++++++++++++++++++++++++++++++++++ include/linux/mmc/core.h | 4 ++ include/linux/mmc/host.h | 6 +++ 9 files changed, 339 insertions(+), 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 14d7c0740dc0..df9e160a5cdf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -40,6 +40,8 @@ #include "blk.h" #include "blk-mq.h" +#include + EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); @@ -3547,3 +3549,85 @@ int __init blk_dev_init(void) return 0; } + +/* + * Blk IO latency support. We want this to be as cheap as possible, so doing + * this lockless (and avoiding atomics), a few off by a few errors in this + * code is not harmful, and we don't want to do anything that is + * perf-impactful. + * TODO : If necessary, we can make the histograms per-cpu and aggregate + * them when printing them out. + */ +void +blk_zero_latency_hist(struct io_latency_state *s) +{ + memset(s->latency_y_axis_read, 0, + sizeof(s->latency_y_axis_read)); + memset(s->latency_y_axis_write, 0, + sizeof(s->latency_y_axis_write)); + s->latency_reads_elems = 0; + s->latency_writes_elems = 0; +} +EXPORT_SYMBOL(blk_zero_latency_hist); + +ssize_t +blk_latency_hist_show(struct io_latency_state *s, char *buf) +{ + int i; + int bytes_written = 0; + u_int64_t num_elem, elem; + int pct; + + num_elem = s->latency_reads_elems; + if (num_elem > 0) { + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "IO svc_time Read Latency Histogram (n = %llu):\n", + num_elem); + for (i = 0; + i < ARRAY_SIZE(latency_x_axis_us); + i++) { + elem = s->latency_y_axis_read[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t< %5lluus%15llu%15d%%\n", + latency_x_axis_us[i], + elem, pct); + } + /* Last element in y-axis table is overflow */ + elem = s->latency_y_axis_read[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t> %5dms%15llu%15d%%\n", 10, + elem, pct); + } + num_elem = s->latency_writes_elems; + if (num_elem > 0) { + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "IO svc_time Write Latency Histogram (n = %llu):\n", + num_elem); + for (i = 0; + i < ARRAY_SIZE(latency_x_axis_us); + i++) { + elem = s->latency_y_axis_write[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t< %5lluus%15llu%15d%%\n", + latency_x_axis_us[i], + elem, pct); + } + /* Last element in y-axis table is overflow */ + elem = s->latency_y_axis_write[i]; + pct = div64_u64(elem * 100, num_elem); + bytes_written += scnprintf(buf + bytes_written, + PAGE_SIZE - bytes_written, + "\t> %5dms%15llu%15d%%\n", 10, + elem, pct); + } + return bytes_written; +} +EXPORT_SYMBOL(blk_latency_hist_show); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 3305824f6e76..40ddc3e69a4d 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -201,6 +201,19 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) pr_debug("%s: %d bytes transferred: %d\n", mmc_hostname(host), mrq->data->bytes_xfered, mrq->data->error); +#ifdef CONFIG_BLOCK + if (mrq->lat_hist_enabled) { + ktime_t completion; + u_int64_t delta_us; + + completion = ktime_get(); + delta_us = ktime_us_delta(completion, + mrq->io_start); + blk_update_latency_hist(&host->io_lat_s, + (mrq->data->flags & MMC_DATA_READ), + delta_us); + } +#endif } if (mrq->stop) { @@ -699,8 +712,16 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host, } } - if (!err && areq) + if (!err && areq) { +#ifdef CONFIG_BLOCK + if (host->latency_hist_enabled) { + areq->mrq->io_start = ktime_get(); + areq->mrq->lat_hist_enabled = 1; + } else + areq->mrq->lat_hist_enabled = 0; +#endif start_err = __mmc_start_data_req(host, areq->mrq); + } if (host->areq) mmc_post_req(host, host->areq->mrq, 0); @@ -2051,7 +2072,7 @@ void mmc_init_erase(struct mmc_card *card) } static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, - unsigned int arg, unsigned int qty) + unsigned int arg, unsigned int qty) { unsigned int erase_timeout; @@ -3074,6 +3095,56 @@ static void __exit mmc_exit(void) mmc_unregister_bus(); } +#ifdef CONFIG_BLOCK +static ssize_t +latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + + return blk_latency_hist_show(&host->io_lat_s, buf); +} + +/* + * Values permitted 0, 1, 2. + * 0 -> Disable IO latency histograms (default) + * 1 -> Enable IO latency histograms + * 2 -> Zero out IO latency histograms + */ +static ssize_t +latency_hist_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + long value; + + if (kstrtol(buf, 0, &value)) + return -EINVAL; + if (value == BLK_IO_LAT_HIST_ZERO) + blk_zero_latency_hist(&host->io_lat_s); + else if (value == BLK_IO_LAT_HIST_ENABLE || + value == BLK_IO_LAT_HIST_DISABLE) + host->latency_hist_enabled = value; + return count; +} + +static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR, + latency_hist_show, latency_hist_store); + +void +mmc_latency_hist_sysfs_init(struct mmc_host *host) +{ + if (device_create_file(&host->class_dev, &dev_attr_latency_hist)) + dev_err(&host->class_dev, + "Failed to create latency_hist sysfs entry\n"); +} + +void +mmc_latency_hist_sysfs_exit(struct mmc_host *host) +{ + device_remove_file(&host->class_dev, &dev_attr_latency_hist); +} +#endif + subsys_initcall(mmc_init); module_exit(mmc_exit); diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 38bb255ebab5..348b58b77b3b 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -31,8 +31,6 @@ #include "slot-gpio.h" #include "pwrseq.h" -#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) - static DEFINE_IDA(mmc_host_ida); static DEFINE_SPINLOCK(mmc_host_lock); @@ -430,6 +428,10 @@ int mmc_add_host(struct mmc_host *host) mmc_add_host_debugfs(host); #endif +#ifdef CONFIG_BLOCK + mmc_latency_hist_sysfs_init(host); +#endif + mmc_start_host(host); if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY)) mmc_register_pm_notifier(host); @@ -457,6 +459,10 @@ void mmc_remove_host(struct mmc_host *host) mmc_remove_host_debugfs(host); #endif +#ifdef CONFIG_BLOCK + mmc_latency_hist_sysfs_exit(host); +#endif + device_del(&host->class_dev); led_trigger_unregister_simple(host->led); diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h index 992bf5397633..bf38533406fd 100644 --- a/drivers/mmc/core/host.h +++ b/drivers/mmc/core/host.h @@ -12,6 +12,8 @@ #define _MMC_CORE_HOST_H #include +#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) + int mmc_register_host_class(void); void mmc_unregister_host_class(void); @@ -21,5 +23,8 @@ void mmc_retune_hold(struct mmc_host *host); void mmc_retune_release(struct mmc_host *host); int mmc_retune(struct mmc_host *host); +void mmc_latency_hist_sysfs_init(struct mmc_host *host); +void mmc_latency_hist_sysfs_exit(struct mmc_host *host); + #endif diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 05c745663c10..9e3177b9cda5 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "ufshcd.h" #include "ufs_quirks.h" #include "unipro.h" @@ -1451,6 +1452,17 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) clear_bit_unlock(tag, &hba->lrb_in_use); goto out; } + + /* IO svc time latency histogram */ + if (hba != NULL && cmd->request != NULL) { + if (hba->latency_hist_enabled && + (cmd->request->cmd_type == REQ_TYPE_FS)) { + cmd->request->lat_hist_io_start = ktime_get(); + cmd->request->lat_hist_enabled = 1; + } else + cmd->request->lat_hist_enabled = 0; + } + WARN_ON(hba->clk_gating.state != CLKS_ON); lrbp = &hba->lrb[tag]; @@ -3533,6 +3545,7 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, struct scsi_cmnd *cmd; int result; int index; + struct request *req; for_each_set_bit(index, &completed_reqs, hba->nutrs) { lrbp = &hba->lrb[index]; @@ -3544,6 +3557,22 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, /* Mark completed command as NULL in LRB */ lrbp->cmd = NULL; clear_bit_unlock(index, &hba->lrb_in_use); + req = cmd->request; + if (req) { + /* Update IO svc time latency histogram */ + if (req->lat_hist_enabled) { + ktime_t completion; + u_int64_t delta_us; + + completion = ktime_get(); + delta_us = ktime_us_delta(completion, + req->lat_hist_io_start); + /* rq_data_dir() => true if WRITE */ + blk_update_latency_hist(&hba->io_lat_s, + (rq_data_dir(req) == READ), + delta_us); + } + } /* Do not touch lrbp after scsi done */ cmd->scsi_done(cmd); __ufshcd_release(hba); @@ -6186,6 +6215,54 @@ int ufshcd_shutdown(struct ufs_hba *hba) } EXPORT_SYMBOL(ufshcd_shutdown); +/* + * Values permitted 0, 1, 2. + * 0 -> Disable IO latency histograms (default) + * 1 -> Enable IO latency histograms + * 2 -> Zero out IO latency histograms + */ +static ssize_t +latency_hist_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + long value; + + if (kstrtol(buf, 0, &value)) + return -EINVAL; + if (value == BLK_IO_LAT_HIST_ZERO) + blk_zero_latency_hist(&hba->io_lat_s); + else if (value == BLK_IO_LAT_HIST_ENABLE || + value == BLK_IO_LAT_HIST_DISABLE) + hba->latency_hist_enabled = value; + return count; +} + +ssize_t +latency_hist_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + return blk_latency_hist_show(&hba->io_lat_s, buf); +} + +static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR, + latency_hist_show, latency_hist_store); + +static void +ufshcd_init_latency_hist(struct ufs_hba *hba) +{ + if (device_create_file(hba->dev, &dev_attr_latency_hist)) + dev_err(hba->dev, "Failed to create latency_hist sysfs entry\n"); +} + +static void +ufshcd_exit_latency_hist(struct ufs_hba *hba) +{ + device_create_file(hba->dev, &dev_attr_latency_hist); +} + /** * ufshcd_remove - de-allocate SCSI host and host memory space * data structure memory @@ -6201,6 +6278,7 @@ void ufshcd_remove(struct ufs_hba *hba) scsi_host_put(hba->host); ufshcd_exit_clk_gating(hba); + ufshcd_exit_latency_hist(hba); if (ufshcd_is_clkscaling_enabled(hba)) devfreq_remove_device(hba->devfreq); ufshcd_hba_exit(hba); @@ -6514,6 +6592,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) /* Hold auto suspend until async scan completes */ pm_runtime_get_sync(dev); + ufshcd_init_latency_hist(hba); + /* * The device-initialize-sequence hasn't been invoked yet. * Set the device to power-off state @@ -6528,6 +6608,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) scsi_remove_host(hba->host); exit_gating: ufshcd_exit_clk_gating(hba); + ufshcd_exit_latency_hist(hba); out_disable: hba->is_irq_enabled = false; scsi_host_put(host); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 430bef111293..06c4027971e7 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -547,6 +547,8 @@ struct ufs_hba { enum bkops_status urgent_bkops_lvl; bool is_urgent_bkops_lvl_checked; + int latency_hist_enabled; + struct io_latency_state io_lat_s; }; /* Returns true if clocks can be gated. Otherwise false */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c47c358ba052..53d4ea5d496b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,6 +196,9 @@ struct request { /* for bidi */ struct request *next_rq; + + ktime_t lat_hist_io_start; + int lat_hist_enabled; }; #define REQ_OP_SHIFT (8 * sizeof(u64) - REQ_OP_BITS) @@ -1700,6 +1703,79 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *, extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); extern int bdev_dax_supported(struct super_block *, int); extern bool bdev_dax_capable(struct block_device *); + +/* + * X-axis for IO latency histogram support. + */ +static const u_int64_t latency_x_axis_us[] = { + 100, + 200, + 300, + 400, + 500, + 600, + 700, + 800, + 900, + 1000, + 1200, + 1400, + 1600, + 1800, + 2000, + 2500, + 3000, + 4000, + 5000, + 6000, + 7000, + 9000, + 10000 +}; + +#define BLK_IO_LAT_HIST_DISABLE 0 +#define BLK_IO_LAT_HIST_ENABLE 1 +#define BLK_IO_LAT_HIST_ZERO 2 + +struct io_latency_state { + u_int64_t latency_y_axis_read[ARRAY_SIZE(latency_x_axis_us) + 1]; + u_int64_t latency_reads_elems; + u_int64_t latency_y_axis_write[ARRAY_SIZE(latency_x_axis_us) + 1]; + u_int64_t latency_writes_elems; +}; + +static inline void +blk_update_latency_hist(struct io_latency_state *s, + int read, + u_int64_t delta_us) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) { + if (delta_us < (u_int64_t)latency_x_axis_us[i]) { + if (read) + s->latency_y_axis_read[i]++; + else + s->latency_y_axis_write[i]++; + break; + } + } + if (i == ARRAY_SIZE(latency_x_axis_us)) { + /* Overflowed the histogram */ + if (read) + s->latency_y_axis_read[i]++; + else + s->latency_y_axis_write[i]++; + } + if (read) + s->latency_reads_elems++; + else + s->latency_writes_elems++; +} + +void blk_zero_latency_hist(struct io_latency_state *s); +ssize_t blk_latency_hist_show(struct io_latency_state *s, char *buf); + #else /* CONFIG_BLOCK */ struct block_device; diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 2b953eb8ceae..46a4b798c7cf 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -142,6 +142,10 @@ struct mmc_request { /* Allow other commands during this ongoing data transfer or busy wait */ bool cap_cmd_during_tfr; + ktime_t io_start; +#ifdef CONFIG_BLOCK + int lat_hist_enabled; +#endif }; struct mmc_card; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 6ddd1402baab..fac3b5c27f4f 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -406,6 +407,11 @@ struct mmc_host { } embedded_sdio_data; #endif +#ifdef CONFIG_BLOCK + int latency_hist_enabled; + struct io_latency_state io_lat_s; +#endif + unsigned long private[0] ____cacheline_aligned; }; -- GitLab From 25cc70f312671731c1eb8d04c9c54c35c348f524 Mon Sep 17 00:00:00 2001 From: Mohan Srinivasan Date: Wed, 14 Dec 2016 16:39:51 -0800 Subject: [PATCH 1084/1442] ANDROID: fs: FS tracepoints to track IO. Adds tracepoints in ext4/f2fs/mpage to track readpages/buffered write()s. This allows us to track files that are being read/written to PIDs. (Merged from android4.4-common). Signed-off-by: Mohan Srinivasan --- fs/ext4/inline.c | 6 ++ fs/ext4/inode.c | 28 ++++++++ fs/ext4/readpage.c | 41 +++++++++-- fs/f2fs/data.c | 21 ++++++ fs/f2fs/inline.c | 11 +++ fs/mpage.c | 30 ++++++++ include/trace/events/android_fs.h | 31 +++++++++ include/trace/events/android_fs_template.h | 79 ++++++++++++++++++++++ 8 files changed, 243 insertions(+), 4 deletions(-) create mode 100644 include/trace/events/android_fs.h create mode 100644 include/trace/events/android_fs_template.h diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index f74d5ee2cdec..bdef750eedd6 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -18,6 +18,7 @@ #include "ext4.h" #include "xattr.h" #include "truncate.h" +#include #define EXT4_XATTR_SYSTEM_DATA "data" #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) @@ -500,6 +501,9 @@ int ext4_readpage_inline(struct inode *inode, struct page *page) return -EAGAIN; } + trace_android_fs_dataread_start(inode, page_offset(page), PAGE_SIZE, + current->pid, current->comm); + /* * Current inline data can only exist in the 1st page, * So for all the other pages, just set them uptodate. @@ -511,6 +515,8 @@ int ext4_readpage_inline(struct inode *inode, struct page *page) SetPageUptodate(page); } + trace_android_fs_dataread_end(inode, page_offset(page), PAGE_SIZE); + up_read(&EXT4_I(inode)->xattr_sem); unlock_page(page); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9c064727ed62..852b179ec965 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -44,6 +44,7 @@ #include "truncate.h" #include +#include #define MPAGE_DA_EXTENT_TAIL 0x01 @@ -1183,6 +1184,8 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, pgoff_t index; unsigned from, to; + trace_android_fs_datawrite_start(inode, pos, len, + current->pid, current->comm); trace_ext4_write_begin(inode, pos, len, flags); /* * Reserve one block more for addition to orphan list in case @@ -1320,6 +1323,7 @@ static int ext4_write_end(struct file *file, int ret = 0, ret2; int i_size_changed = 0; + trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_write_end(inode, pos, len, copied); if (ext4_has_inline_data(inode)) { ret = ext4_write_inline_data_end(inode, pos, len, @@ -1419,6 +1423,7 @@ static int ext4_journalled_write_end(struct file *file, unsigned from, to; int size_changed = 0; + trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_SIZE - 1); to = from + len; @@ -2897,6 +2902,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, len, flags, pagep, fsdata); } *fsdata = (void *)0; + trace_android_fs_datawrite_start(inode, pos, len, + current->pid, current->comm); trace_ext4_da_write_begin(inode, pos, len, flags); if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { @@ -3015,6 +3022,7 @@ static int ext4_da_write_end(struct file *file, return ext4_write_end(file, mapping, pos, len, copied, page, fsdata); + trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_da_write_end(inode, pos, len, copied); start = pos & (PAGE_SIZE - 1); end = start + copied - 1; @@ -3571,6 +3579,7 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; ssize_t ret; + int rw = iov_iter_rw(iter); #ifdef CONFIG_EXT4_FS_ENCRYPTION if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) @@ -3587,12 +3596,31 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (ext4_has_inline_data(inode)) return 0; + if (trace_android_fs_dataread_start_enabled() && + (rw == READ)) + trace_android_fs_dataread_start(inode, offset, count, + current->pid, + current->comm); + if (trace_android_fs_datawrite_start_enabled() && + (rw == WRITE)) + trace_android_fs_datawrite_start(inode, offset, count, + current->pid, + current->comm); + trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (iov_iter_rw(iter) == READ) ret = ext4_direct_IO_read(iocb, iter); else ret = ext4_direct_IO_write(iocb, iter); trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); + + if (trace_android_fs_dataread_start_enabled() && + (rw == READ)) + trace_android_fs_dataread_end(inode, offset, count); + if (trace_android_fs_datawrite_start_enabled() && + (rw == WRITE)) + trace_android_fs_datawrite_end(inode, offset, count); + return ret; } diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index a81b829d56de..77cf54c981bb 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -45,6 +45,7 @@ #include #include "ext4.h" +#include static inline bool ext4_bio_encrypted(struct bio *bio) { @@ -55,6 +56,17 @@ static inline bool ext4_bio_encrypted(struct bio *bio) #endif } +static void +ext4_trace_read_completion(struct bio *bio) +{ + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) + trace_android_fs_dataread_end(first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size); +} + /* * I/O completion handler for multipage BIOs. * @@ -72,6 +84,9 @@ static void mpage_end_io(struct bio *bio) struct bio_vec *bv; int i; + if (trace_android_fs_dataread_start_enabled()) + ext4_trace_read_completion(bio); + if (ext4_bio_encrypted(bio)) { if (bio->bi_error) { fscrypt_release_ctx(bio->bi_private); @@ -95,6 +110,24 @@ static void mpage_end_io(struct bio *bio) bio_put(bio); } +static void +ext4_submit_bio_read(struct bio *bio) +{ + if (trace_android_fs_dataread_start_enabled()) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) { + trace_android_fs_dataread_start( + first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size, + current->pid, + current->comm); + } + } + submit_bio(bio); +} + int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, unsigned nr_pages) @@ -235,7 +268,7 @@ int ext4_mpage_readpages(struct address_space *mapping, */ if (bio && (last_block_in_bio != blocks[0] - 1)) { submit_and_realloc: - submit_bio(bio); + ext4_submit_bio_read(bio); bio = NULL; } if (bio == NULL) { @@ -268,14 +301,14 @@ int ext4_mpage_readpages(struct address_space *mapping, if (((map.m_flags & EXT4_MAP_BOUNDARY) && (relative_block == map.m_len)) || (first_hole != blocks_per_page)) { - submit_bio(bio); + ext4_submit_bio_read(bio); bio = NULL; } else last_block_in_bio = blocks[blocks_per_page - 1]; goto next_page; confused: if (bio) { - submit_bio(bio); + ext4_submit_bio_read(bio); bio = NULL; } if (!PageUptodate(page)) @@ -288,6 +321,6 @@ int ext4_mpage_readpages(struct address_space *mapping, } BUG_ON(pages && !list_empty(pages)); if (bio) - submit_bio(bio); + ext4_submit_bio_read(bio); return 0; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9ae194fd2fdb..06a0d9cc6345 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -28,6 +28,7 @@ #include "segment.h" #include "trace.h" #include +#include static void f2fs_read_end_io(struct bio *bio) { @@ -1606,6 +1607,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, block_t blkaddr = NULL_ADDR; int err = 0; + trace_android_fs_datawrite_start(inode, pos, len, + current->pid, current->comm); trace_f2fs_write_begin(inode, pos, len, flags); /* @@ -1697,6 +1700,7 @@ static int f2fs_write_end(struct file *file, { struct inode *inode = page->mapping->host; + trace_android_fs_datawrite_end(inode, pos, len); trace_f2fs_write_end(inode, pos, len, copied); /* @@ -1758,6 +1762,16 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) trace_f2fs_direct_IO_enter(inode, offset, count, rw); + if (trace_android_fs_dataread_start_enabled() && + (rw == READ)) + trace_android_fs_dataread_start(inode, offset, + count, current->pid, + current->comm); + if (trace_android_fs_datawrite_start_enabled() && + (rw == WRITE)) + trace_android_fs_datawrite_start(inode, offset, count, + current->pid, current->comm); + down_read(&F2FS_I(inode)->dio_rwsem[rw]); err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); up_read(&F2FS_I(inode)->dio_rwsem[rw]); @@ -1769,6 +1783,13 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) f2fs_write_failed(mapping, offset + count); } + if (trace_android_fs_dataread_start_enabled() && + (rw == READ)) + trace_android_fs_dataread_end(inode, offset, count); + if (trace_android_fs_datawrite_start_enabled() && + (rw == WRITE)) + trace_android_fs_datawrite_end(inode, offset, count); + trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); return err; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 5f1a67f756af..d534f44774ea 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -13,6 +13,7 @@ #include "f2fs.h" #include "node.h" +#include bool f2fs_may_inline_data(struct inode *inode) { @@ -82,14 +83,22 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) { struct page *ipage; + trace_android_fs_dataread_start(inode, page_offset(page), + PAGE_SIZE, current->pid, + current->comm); + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) { + trace_android_fs_dataread_end(inode, page_offset(page), + PAGE_SIZE); unlock_page(page); return PTR_ERR(ipage); } if (!f2fs_has_inline_data(inode)) { f2fs_put_page(ipage, 1); + trace_android_fs_dataread_end(inode, page_offset(page), + PAGE_SIZE); return -EAGAIN; } @@ -101,6 +110,8 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); f2fs_put_page(ipage, 1); + trace_android_fs_dataread_end(inode, page_offset(page), + PAGE_SIZE); unlock_page(page); return 0; } diff --git a/fs/mpage.c b/fs/mpage.c index d2413af0823a..2bb117dc508a 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -31,6 +31,14 @@ #include #include "internal.h" +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_start); +EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_end); +EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_start); +EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_end); + /* * I/O completion handler for multipage BIOs. * @@ -48,6 +56,16 @@ static void mpage_end_io(struct bio *bio) struct bio_vec *bv; int i; + if (trace_android_fs_dataread_end_enabled() && + (bio_data_dir(bio) == READ)) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) + trace_android_fs_dataread_end(first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size); + } + bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; page_endio(page, op_is_write(bio_op(bio)), bio->bi_error); @@ -58,6 +76,18 @@ static void mpage_end_io(struct bio *bio) static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio) { + if (trace_android_fs_dataread_start_enabled() && (op == REQ_OP_READ)) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) { + trace_android_fs_dataread_start( + first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size, + current->pid, + current->comm); + } + } bio->bi_end_io = mpage_end_io; bio_set_op_attrs(bio, op, op_flags); guard_bio_eod(op, bio); diff --git a/include/trace/events/android_fs.h b/include/trace/events/android_fs.h new file mode 100644 index 000000000000..531da433a7bc --- /dev/null +++ b/include/trace/events/android_fs.h @@ -0,0 +1,31 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM android_fs + +#if !defined(_TRACE_ANDROID_FS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ANDROID_FS_H + +#include +#include + +DEFINE_EVENT(android_fs_data_start_template, android_fs_dataread_start, + TP_PROTO(struct inode *inode, loff_t offset, int bytes, + pid_t pid, char *command), + TP_ARGS(inode, offset, bytes, pid, command)); + +DEFINE_EVENT(android_fs_data_end_template, android_fs_dataread_end, + TP_PROTO(struct inode *inode, loff_t offset, int bytes), + TP_ARGS(inode, offset, bytes)); + +DEFINE_EVENT(android_fs_data_start_template, android_fs_datawrite_start, + TP_PROTO(struct inode *inode, loff_t offset, int bytes, + pid_t pid, char *command), + TP_ARGS(inode, offset, bytes, pid, command)); + +DEFINE_EVENT(android_fs_data_end_template, android_fs_datawrite_end, + TP_PROTO(struct inode *inode, loff_t offset, int bytes), + TP_ARGS(inode, offset, bytes)); + +#endif /* _TRACE_ANDROID_FS_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/android_fs_template.h b/include/trace/events/android_fs_template.h new file mode 100644 index 000000000000..618988b047c1 --- /dev/null +++ b/include/trace/events/android_fs_template.h @@ -0,0 +1,79 @@ +#if !defined(_TRACE_ANDROID_FS_TEMPLATE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ANDROID_FS_TEMPLATE_H + +#include + +DECLARE_EVENT_CLASS(android_fs_data_start_template, + TP_PROTO(struct inode *inode, loff_t offset, int bytes, + pid_t pid, char *command), + TP_ARGS(inode, offset, bytes, pid, command), + TP_STRUCT__entry( + __array(char, path, MAX_FILTER_STR_VAL); + __field(char *, pathname); + __field(loff_t, offset); + __field(int, bytes); + __field(loff_t, i_size); + __string(cmdline, command); + __field(pid_t, pid); + __field(ino_t, ino); + ), + TP_fast_assign( + { + struct dentry *d; + + /* + * Grab a reference to the inode here because + * d_obtain_alias() will either drop the inode + * reference if it locates an existing dentry + * or transfer the reference to the new dentry + * created. In our case, the file is still open, + * so the dentry is guaranteed to exist (connected), + * so d_obtain_alias() drops the reference we + * grabbed here. + */ + ihold(inode); + d = d_obtain_alias(inode); + if (!IS_ERR(d)) { + __entry->pathname = dentry_path(d, + __entry->path, + MAX_FILTER_STR_VAL); + dput(d); + } else + __entry->pathname = ERR_PTR(-EINVAL); + __entry->offset = offset; + __entry->bytes = bytes; + __entry->i_size = i_size_read(inode); + __assign_str(cmdline, command); + __entry->pid = pid; + __entry->ino = inode->i_ino; + } + ), + TP_printk("entry_name %s, offset %llu, bytes %d, cmdline %s," + " pid %d, i_size %llu, ino %lu", + (IS_ERR(__entry->pathname) ? "ERROR" : __entry->pathname), + __entry->offset, __entry->bytes, __get_str(cmdline), + __entry->pid, __entry->i_size, + (unsigned long) __entry->ino) +); + +DECLARE_EVENT_CLASS(android_fs_data_end_template, + TP_PROTO(struct inode *inode, loff_t offset, int bytes), + TP_ARGS(inode, offset, bytes), + TP_STRUCT__entry( + __field(ino_t, ino); + __field(loff_t, offset); + __field(int, bytes); + ), + TP_fast_assign( + { + __entry->ino = inode->i_ino; + __entry->offset = offset; + __entry->bytes = bytes; + } + ), + TP_printk("ino %lu, offset %llu, bytes %d", + (unsigned long) __entry->ino, + __entry->offset, __entry->bytes) +); + +#endif /* _TRACE_ANDROID_FS_TEMPLATE_H */ -- GitLab From 81a159106e15d2a0f746368387ef486c9d2fed07 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:41 +0900 Subject: [PATCH 1085/1442] UPSTREAM: net: core: Add a UID field to struct sock. Protocol sockets (struct sock) don't have UIDs, but most of the time, they map 1:1 to userspace sockets (struct socket) which do. Various operations such as the iptables xt_owner match need access to the "UID of a socket", and do so by following the backpointer to the struct socket. This involves taking sk_callback_lock and doesn't work when there is no socket because userspace has already called close(). Simplify this by adding a sk_uid field to struct sock whose value matches the UID of the corresponding struct socket. The semantics are as follows: 1. Whenever sk_socket is non-null: sk_uid is the same as the UID in sk_socket, i.e., matches the return value of sock_i_uid. Specifically, the UID is set when userspace calls socket(), fchown(), or accept(). 2. When sk_socket is NULL, sk_uid is defined as follows: - For a socket that no longer has a sk_socket because userspace has called close(): the previous UID. - For a cloned socket (e.g., an incoming connection that is established but on which userspace has not yet called accept): the UID of the socket it was cloned from. - For a socket that has never had an sk_socket: UID 0 inside the user namespace corresponding to the network namespace the socket belongs to. Kernel sockets created by sock_create_kern are a special case of #1 and sk_uid is the user that created them. For kernel sockets created at network namespace creation time, such as the per-processor ICMP and TCP sockets, this is the user that created the network namespace. Change-Id: Id890c6ea724b6929cc543a474ab37ec2d9e3f815 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/net/sock.h | 7 +++++++ net/core/sock.c | 5 ++++- net/socket.c | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 92b269709b9a..97f8ed2202bf 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -419,6 +419,7 @@ struct sock { u32 sk_max_ack_backlog; __u32 sk_priority; __u32 sk_mark; + kuid_t sk_uid; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; @@ -1651,6 +1652,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_wq = parent->wq; parent->sk = sk; sk_set_socket(sk, parent); + sk->sk_uid = SOCK_INODE(parent)->i_uid; security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } @@ -1658,6 +1660,11 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) kuid_t sock_i_uid(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); +static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) +{ + return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); +} + static inline u32 net_tx_rndhash(void) { u32 v = prandom_u32(); diff --git a/net/core/sock.c b/net/core/sock.c index 00a074dbfe9b..9ce8708be96c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2436,8 +2436,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_type = sock->type; sk->sk_wq = sock->wq; sock->sk = sk; - } else + sk->sk_uid = SOCK_INODE(sock)->i_uid; + } else { sk->sk_wq = NULL; + sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); + } rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_callback_lock, diff --git a/net/socket.c b/net/socket.c index 73dc69f9681e..349741687a48 100644 --- a/net/socket.c +++ b/net/socket.c @@ -533,8 +533,22 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, return used; } +int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + int err = simple_setattr(dentry, iattr); + + if (!err) { + struct socket *sock = SOCKET_I(d_inode(dentry)); + + sock->sk->sk_uid = iattr->ia_uid; + } + + return err; +} + static const struct inode_operations sockfs_inode_ops = { .listxattr = sockfs_listxattr, + .setattr = sockfs_setattr, }; /** -- GitLab From 2ec93fec4013af92e8975bed174d48f465caed3a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:42 +0900 Subject: [PATCH 1086/1442] UPSTREAM: net: core: add UID to flows, rules, and routes - Define a new FIB rule attributes, FRA_UID_RANGE, to describe a range of UIDs. - Define a RTA_UID attribute for per-UID route lookups and dumps. - Support passing these attributes to and from userspace via rtnetlink. The value INVALID_UID indicates no UID was specified. - Add a UID field to the flow structures. Change-Id: I4d9d03e357ed5f35a65751b6d8ad919f0336dbfb Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/net/fib_rules.h | 9 ++++- include/net/flow.h | 5 +++ include/uapi/linux/fib_rules.h | 6 +++ include/uapi/linux/rtnetlink.h | 1 + net/core/fib_rules.c | 74 +++++++++++++++++++++++++++++++++- net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 11 +++++ net/ipv6/route.c | 7 ++++ 8 files changed, 111 insertions(+), 3 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 456e4a6006ab..8dbfdf728cd8 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,11 @@ #include #include +struct fib_kuid_range { + kuid_t start; + kuid_t end; +}; + struct fib_rule { struct list_head list; int iifindex; @@ -30,6 +35,7 @@ struct fib_rule { int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + struct fib_kuid_range uid_range; struct rcu_head rcu; }; @@ -92,7 +98,8 @@ struct fib_rules_ops { [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_L3MDEV] = { .type = NLA_U8 } + [FRA_L3MDEV] = { .type = NLA_U8 }, \ + [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 035aa7716967..51373f3a5e31 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -11,6 +11,7 @@ #include #include #include +#include /* * ifindex generation is per-net namespace, and loopback is @@ -37,6 +38,7 @@ struct flowi_common { #define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; + kuid_t flowic_uid; }; union flowi_uli { @@ -74,6 +76,7 @@ struct flowi4 { #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid #define flowi4_tun_key __fl_common.flowic_tun_key +#define flowi4_uid __fl_common.flowic_uid /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; @@ -131,6 +134,7 @@ struct flowi6 { #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid #define flowi6_tun_key __fl_common.flowic_tun_key +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; /* Note: flowi6_tos is encoded in flowlabel, too. */ @@ -176,6 +180,7 @@ struct flowi { #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid #define flowi_tun_key u.__fl_common.flowic_tun_key +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 14404b3ebb89..bbf02a63a011 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -29,6 +29,11 @@ struct fib_rule_hdr { __u32 flags; }; +struct fib_rule_uid_range { + __u32 start; + __u32 end; +}; + enum { FRA_UNSPEC, FRA_DST, /* destination address */ @@ -51,6 +56,7 @@ enum { FRA_OIFNAME, FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE, /* UID range */ __FRA_MAX }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5a78be518101..e14377f2ec27 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -318,6 +318,7 @@ enum rtattr_type_t { RTA_ENCAP, RTA_EXPIRES, RTA_PAD, + RTA_UID, __RTA_MAX }; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index be4629c344a6..5de436a73be2 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -18,6 +18,11 @@ #include #include +static const struct fib_kuid_range fib_kuid_range_unset = { + KUIDT_INIT(0), + KUIDT_INIT(~0), +}; + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -33,6 +38,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->table = table; r->flags = flags; r->fr_net = ops->fro_net; + r->uid_range = fib_kuid_range_unset; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -172,6 +178,34 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static int uid_range_set(struct fib_kuid_range *range) +{ + return uid_valid(range->start) && uid_valid(range->end); +} + +static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb) +{ + struct fib_rule_uid_range *in; + struct fib_kuid_range out; + + in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]); + + out.start = make_kuid(current_user_ns(), in->start); + out.end = make_kuid(current_user_ns(), in->end); + + return out; +} + +static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) +{ + struct fib_rule_uid_range out = { + from_kuid_munged(current_user_ns(), range->start), + from_kuid_munged(current_user_ns(), range->end) + }; + + return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) @@ -193,6 +227,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg)) goto out; + if (uid_lt(fl->flowi_uid, rule->uid_range.start) || + uid_gt(fl->flowi_uid, rule->uid_range.end)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -429,6 +467,21 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (rule->l3mdev && rule->table) goto errout_free; + if (tb[FRA_UID_RANGE]) { + if (current_user_ns() != net->user_ns) { + err = -EPERM; + goto errout_free; + } + + rule->uid_range = nla_get_kuid_range(tb); + + if (!uid_range_set(&rule->uid_range) || + !uid_lte(rule->uid_range.start, rule->uid_range.end)) + goto errout_free; + } else { + rule->uid_range = fib_kuid_range_unset; + } + if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; @@ -497,6 +550,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; struct nlattr *tb[FRA_MAX+1]; + struct fib_kuid_range range; int err = -EINVAL; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) @@ -516,6 +570,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; + if (tb[FRA_UID_RANGE]) { + range = nla_get_kuid_range(tb); + if (!uid_range_set(&range)) + goto errout; + } else { + range = fib_kuid_range_unset; + } + list_for_each_entry(rule, &ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; @@ -552,6 +614,11 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV]))) continue; + if (uid_range_set(&range) && + (!uid_eq(rule->uid_range.start, range.start) || + !uid_eq(rule->uid_range.end, range.end))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -619,7 +686,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */ + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ - + nla_total_size_64bit(8); /* FRA_TUN_ID */ + + nla_total_size_64bit(8) /* FRA_TUN_ID */ + + nla_total_size(sizeof(struct fib_kuid_range)); if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -679,7 +747,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->tun_id && nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) || (rule->l3mdev && - nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev))) + nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) || + (uid_range_set(&rule->uid_range) && + nla_put_uid_range(skb, &rule->uid_range))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 161fc0f0d752..121384bbb40b 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -620,6 +620,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_FLOW] = { .type = NLA_U32 }, [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2a57566e6e91..c0d82ffd0665 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2495,6 +2495,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; + if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && + nla_put_u32(skb, RTA_UID, + from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) + goto nla_put_failure; + error = rt->dst.error; if (rt_is_input_route(rt)) { @@ -2547,6 +2552,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) int mark; struct sk_buff *skb; u32 table_id = RT_TABLE_MAIN; + kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) @@ -2574,6 +2580,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + if (tb[RTA_UID]) + uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); + else + uid = (iif ? INVALID_UID : current_uid()); memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2581,6 +2591,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_tos = rtm->rtm_tos; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; + fl4.flowi4_uid = uid; if (iif) { struct net_device *dev; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0b0cdb16bcd1..f629c8531297 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2774,6 +2774,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_EXPIRES] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -3348,6 +3349,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (tb[RTA_MARK]) fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (tb[RTA_UID]) + fl6.flowi6_uid = make_kuid(current_user_ns(), + nla_get_u32(tb[RTA_UID])); + else + fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (iif) { struct net_device *dev; int flags = 0; -- GitLab From 5044292c361587d89bcbec50da260f5ad18c8bd1 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:43 +0900 Subject: [PATCH 1087/1442] UPSTREAM: net: inet: Support UID-based routing in IP protocols. - Use the UID in routing lookups made by protocol connect() and sendmsg() functions. - Make sure that routing lookups triggered by incoming packets (e.g., Path MTU discovery) take the UID of the socket into account. - For packets not associated with a userspace socket, (e.g., ping replies) use UID 0 inside the user namespace corresponding to the network namespace the socket belongs to. This allows all namespaces to apply routing and iptables rules to kernel-originated traffic in that namespaces by matching UID 0. This is better than using the UID of the kernel socket that is sending the traffic, because the UID of kernel sockets created at namespace creation time (e.g., the per-processor ICMP and TCP sockets) is the UID of the user that created the socket, which might not be mapped in the namespace. Change-Id: Ie35761630d9a77746399d6808ff0efb143efabb8 Tested: compiles allnoconfig, allyesconfig, allmodconfig Tested: https://android-review.googlesource.com/253302 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/net/flow.h | 4 +++- include/net/ip.h | 1 + include/net/ip6_route.h | 5 +++-- include/net/route.h | 5 +++-- net/ipv4/icmp.c | 2 ++ net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/ip_output.c | 3 ++- net/ipv4/ping.c | 3 ++- net/ipv4/raw.c | 2 +- net/ipv4/route.c | 26 +++++++++++++++----------- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_ipv4.c | 9 ++++++--- net/ipv4/udp.c | 3 ++- net/ipv6/af_inet6.c | 1 + net/ipv6/ah6.c | 5 +++-- net/ipv6/datagram.c | 1 + net/ipv6/esp6.c | 5 +++-- net/ipv6/icmp.c | 7 +++++-- net/ipv6/inet6_connection_sock.c | 2 ++ net/ipv6/ip6_gre.c | 4 ++++ net/ipv6/ip6_tunnel.c | 4 ++++ net/ipv6/ip6_vti.c | 5 +++-- net/ipv6/ipcomp6.c | 5 +++-- net/ipv6/netfilter.c | 1 + net/ipv6/ping.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/route.c | 13 +++++++++---- net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 2 ++ net/ipv6/udp.c | 1 + net/l2tp/l2tp_ip6.c | 1 + 31 files changed, 89 insertions(+), 40 deletions(-) diff --git a/include/net/flow.h b/include/net/flow.h index 51373f3a5e31..6bbbca8af8e3 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -96,7 +96,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be16 sport) + __be16 dport, __be16 sport, + kuid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; @@ -107,6 +108,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; fl4->flowi4_tun_key.tun_id = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; diff --git a/include/net/ip.h b/include/net/ip.h index d3a107850a41..b043c7d1914e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -178,6 +178,7 @@ struct ip_reply_arg { /* -1 if not needed */ int bound_dev_if; u8 tos; + kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f83e78d071a3..9dc2c182a263 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -140,9 +140,10 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, - u32 mark); + u32 mark, kuid_t uid); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid); void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, u32 mark); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); diff --git a/include/net/route.h b/include/net/route.h index 0429d47cad25..c0874c87c173 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -153,7 +153,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -269,7 +269,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_ANYSRC; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 48734ee6293f..691146abde2d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -425,6 +425,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_mark = mark; + fl4.flowi4_uid = sock_net_uid(net, NULL); fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); @@ -473,6 +474,7 @@ static struct rtable *icmp_route_lookup(struct net *net, param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; fl4->flowi4_mark = mark; + fl4->flowi4_uid = sock_net_uid(net, NULL); fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 61a9deec2993..d5d3ead0a6c3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -415,7 +415,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -452,7 +452,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 877bdb02e887..d24fa2075c63 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1594,7 +1594,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 96b8e2b95731..5b2635e69a92 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -793,7 +793,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ecbe5a7c2d6d..7525f5eab622 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -604,7 +604,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, sk->sk_uid); if (!inet->hdrincl) { rfv.msg = msg; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c0d82ffd0665..36f9f8b40e38 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -507,7 +507,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); -static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, +static void __build_flow_key(const struct net *net, struct flowi4 *fl4, + const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -523,7 +524,8 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, flowi4_init_output(fl4, oif, mark, tos, RT_SCOPE_UNIVERSE, prot, flow_flags, - iph->daddr, iph->saddr, 0, 0); + iph->daddr, iph->saddr, 0, 0, + sock_net_uid(net, sk)); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, @@ -535,7 +537,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, u8 prot = iph->protocol; u32 mark = skb->mark; - __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), fl4, sk, iph, oif, tos, prot, mark, 0); } static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) @@ -552,7 +554,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0); + daddr, inet->inet_saddr, 0, 0, sk->sk_uid); rcu_read_unlock(); } @@ -802,7 +804,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; - __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1020,7 +1022,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, if (!mark) mark = IP4_REPLY_MARK(net, skb->mark); - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1036,7 +1038,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); if (!fl4.flowi4_mark) fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); @@ -1055,6 +1057,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct rtable *rt; struct dst_entry *odst = NULL; bool new = false; + struct net *net = sock_net(sk); bh_lock_sock(sk); @@ -1068,7 +1071,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; } - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); rt = (struct rtable *)odst; if (odst->obsolete && !odst->ops->check(odst, 0)) { @@ -1108,7 +1111,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1123,9 +1126,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; + struct net *net = sock_net(sk); - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); - rt = __ip_route_output_key(sock_net(sk), &fl4); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e3c4043c27de..0dc6286272aa 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -372,7 +372,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, th->source, th->dest); + ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2259114c7242..eb5a0e1fb187 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -691,6 +691,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) offsetof(struct inet_timewait_sock, tw_bound_dev_if)); arg.tos = ip_hdr(skb)->tos; + arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -711,7 +712,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct net *net, +static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, @@ -726,6 +727,7 @@ static void tcp_v4_send_ack(struct net *net, #endif ]; } rep; + struct net *net = sock_net(sk); struct ip_reply_arg arg; memset(&rep.th, 0, sizeof(struct tcphdr)); @@ -775,6 +777,7 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -790,7 +793,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(sock_net(sk), skb, + tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, @@ -818,7 +821,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, * exception of segments, MUST be right-shifted by * Rcv.Wind.Shift bits: */ - tcp_v4_send_ack(sock_net(sk), skb, seq, + tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5bab6c3f7a2f..5093bb842056 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1019,7 +1019,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 56297dc0534b..02816456a53e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -694,6 +694,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); rcu_read_lock(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0630a4d5daaa..189eb10b742d 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -662,9 +662,10 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index ccf40550c475..ebde2abc7a7c 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -54,6 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; fl6->flowlabel = np->flow_label; + fl6->flowi6_uid = sk->sk_uid; if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 111ba55fd512..cbcdd5db31f4 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -474,9 +474,10 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 2772004ba5a1..17fa28f7a0ff 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -92,9 +92,10 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); else if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) @@ -486,6 +487,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -660,6 +662,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 532c3ef282c5..1c86c478f578 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -88,6 +88,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); + fl6->flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p); @@ -136,6 +137,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; + fl6->flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); rcu_read_lock(); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d7d6d3ae0b3b..710bc79f9113 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -548,6 +548,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) return -1; @@ -602,6 +604,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) return -1; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index d76674efe523..eb3d82d4b589 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1248,6 +1248,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1326,6 +1328,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d58480a9215e..3bce120cf5a4 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -608,9 +608,10 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 1b9316e1386a..54d165b9845a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -74,9 +74,10 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d11c46833d61..39970e212ad5 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -26,6 +26,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .flowi6_mark = skb->mark, + .flowi6_uid = sock_net_uid(net, skb->sk), .daddr = iph->daddr, .saddr = iph->saddr, }; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 66e2d9dfc43a..e1f8b34d7a2e 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -113,6 +113,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.daddr = *daddr; fl6.flowi6_oif = oif; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 054a1d84fc5e..4947ae2db7cb 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -774,6 +774,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f629c8531297..89ab3dc9dbe2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1408,7 +1408,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark) + int oif, u32 mark, kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1420,6 +1420,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1433,7 +1434,7 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) struct dst_entry *dst; ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); + sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || @@ -1525,7 +1526,8 @@ static struct dst_entry *ip6_route_redirect(struct net *net, flags, __ip6_route_redirect); } -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1538,6 +1540,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1559,6 +1562,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, fl6.flowi6_mark = mark; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; + fl6.flowi6_uid = sock_net_uid(net, NULL); dst = ip6_route_redirect(net, &fl6, &iph->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1567,7 +1571,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { - ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, + sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 59c483937aec..97830a6a9cbb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -227,6 +227,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b9f1fee9a886..28ec0a2e7b72 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -233,6 +233,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); @@ -828,6 +829,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; + fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e4a8000d59ad..f6fbd257cf7e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1156,6 +1156,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index aa821cb639e5..f092ac441fdd 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -525,6 +525,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; -- GitLab From 2b9e162b21a7931a5788456edb65199ebd8b0e21 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 30 Nov 2016 02:56:47 +0900 Subject: [PATCH 1088/1442] UPSTREAM: net: ipv4: Don't crash if passing a null sk to ip_rt_update_pmtu. Commit e2d118a1cb5e ("net: inet: Support UID-based routing in IP protocols.") made __build_flow_key call sock_net(sk) to determine the network namespace of the passed-in socket. This crashes if sk is NULL. Fix this by getting the network namespace from the skb instead. Change-Id: Ife860a7af8471bf40cc987f7803059d7bb9cc7e3 Fixes: e2d118a1cb5e ("net: inet: Support UID-based routing in IP protocols.") Reported-by: Erez Shitrit Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 36f9f8b40e38..647fa2a68666 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -531,13 +531,14 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4, static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, const struct sock *sk) { + const struct net *net = dev_net(skb->dev); const struct iphdr *iph = ip_hdr(skb); int oif = skb->dev->ifindex; u8 tos = RT_TOS(iph->tos); u8 prot = iph->protocol; u32 mark = skb->mark; - __build_flow_key(sock_net(sk), fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0); } static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) -- GitLab From d997d9d8a54fe73ee26c96275b62329e5421ed3e Mon Sep 17 00:00:00 2001 From: mukesh agrawal Date: Tue, 12 Jul 2016 11:28:05 -0700 Subject: [PATCH 1089/1442] ANDROID: trace: net: use %pK for kernel pointers We want to use network trace events in production builds, to help diagnose Wifi problems. However, we don't want to expose raw kernel pointers in such builds. Change the format specifier for the skbaddr field, so that, if kptr_restrict is enabled, the pointers will be reported as 0. Bug: 30090733 Change-Id: Ic4bd583d37af6637343601feca875ee24479ddff Signed-off-by: mukesh agrawal --- include/trace/events/net.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 49cc7c3de252..89d009e10938 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -57,7 +57,7 @@ TRACE_EVENT(net_dev_start_xmit, __entry->gso_type = skb_shinfo(skb)->gso_type; ), - TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", + TP_printk("dev=%s queue_mapping=%u skbaddr=%pK vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", __get_str(name), __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->len, @@ -90,7 +90,7 @@ TRACE_EVENT(net_dev_xmit, __assign_str(name, dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u rc=%d", + TP_printk("dev=%s skbaddr=%pK len=%u rc=%d", __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) ); @@ -112,7 +112,7 @@ DECLARE_EVENT_CLASS(net_dev_template, __assign_str(name, skb->dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u", + TP_printk("dev=%s skbaddr=%pK len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) @@ -191,7 +191,7 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __entry->gso_type = skb_shinfo(skb)->gso_type; ), - TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", + TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%pK vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", __get_str(name), __entry->napi_id, __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, -- GitLab From 1452d39a09d5c56e982781db9767c6c9bf01b854 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 30 Dec 2016 17:42:32 -0600 Subject: [PATCH 1090/1442] UPSTREAM: net: socket: don't set sk_uid to garbage value in ->setattr() ->setattr() was recently implemented for socket files to sync the socket inode's uid to the new 'sk_uid' member of struct sock. It does this by copying over the ia_uid member of struct iattr. However, ia_uid is actually only valid when ATTR_UID is set in ia_valid, indicating that the uid is being changed, e.g. by chown. Other metadata operations such as chmod or utimes leave ia_uid uninitialized. Therefore, sk_uid could be set to a "garbage" value from the stack. Fix this by only copying the uid over when ATTR_UID is set. Change-Id: I1efd83bd955325b33be3d4addccf5bac8ec803db Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Signed-off-by: Eric Biggers Tested-by: Lorenzo Colitti Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 349741687a48..5f4ec66570e1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -537,7 +537,7 @@ int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) { int err = simple_setattr(dentry, iattr); - if (!err) { + if (!err && (iattr->ia_valid & ATTR_UID)) { struct socket *sock = SOCKET_I(d_inode(dentry)); sock->sk->sk_uid = iattr->ia_uid; -- GitLab From b4a2694b0b43102414bdc1c010bcff2483e67740 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Thu, 12 Jan 2017 12:28:08 -0800 Subject: [PATCH 1091/1442] ANDROID: fiq_debugger: Remove wakelock.h dependencies Change-Id: I16a0dd4c4c6ee6440ce8a921bc0834d904b81f37 Signed-off-by: Dmitry Shmidt --- .../staging/android/fiq_debugger/fiq_debugger.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c index b132cff14f01..675b974b2a6e 100644 --- a/drivers/staging/android/fiq_debugger/fiq_debugger.c +++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c @@ -33,7 +33,6 @@ #include #include #include -#include #ifdef CONFIG_FIQ_GLUE #include @@ -82,7 +81,7 @@ struct fiq_debugger_state { struct timer_list sleep_timer; spinlock_t sleep_timer_lock; bool uart_enabled; - struct wake_lock debugger_wake_lock; + struct wakeup_source debugger_wake_src; bool console_enable; int current_cpu; atomic_t unhandled_fiq_count; @@ -563,7 +562,7 @@ static void fiq_debugger_sleep_timer_expired(unsigned long data) state->uart_enabled = false; fiq_debugger_enable_wakeup_irq(state); } - wake_unlock(&state->debugger_wake_lock); + __pm_relax(&state->debugger_wake_src); spin_unlock_irqrestore(&state->sleep_timer_lock, flags); } @@ -575,7 +574,7 @@ static void fiq_debugger_handle_wakeup(struct fiq_debugger_state *state) if (state->wakeup_irq >= 0 && state->ignore_next_wakeup_irq) { state->ignore_next_wakeup_irq = false; } else if (!state->uart_enabled) { - wake_lock(&state->debugger_wake_lock); + __pm_stay_awake(&state->debugger_wake_src); fiq_debugger_uart_enable(state); state->uart_enabled = true; fiq_debugger_disable_wakeup_irq(state); @@ -619,7 +618,7 @@ static void fiq_debugger_handle_irq_context(struct fiq_debugger_state *state) unsigned long flags; spin_lock_irqsave(&state->sleep_timer_lock, flags); - wake_lock(&state->debugger_wake_lock); + __pm_stay_awake(&state->debugger_wake_src); mod_timer(&state->sleep_timer, jiffies + HZ * 5); spin_unlock_irqrestore(&state->sleep_timer_lock, flags); } @@ -1086,8 +1085,7 @@ static int fiq_debugger_probe(struct platform_device *pdev) state->no_sleep = true; state->ignore_next_wakeup_irq = !state->no_sleep; - wake_lock_init(&state->debugger_wake_lock, - WAKE_LOCK_SUSPEND, "serial-debug"); + wakeup_source_init(&state->debugger_wake_src, "serial-debug"); state->clk = clk_get(&pdev->dev, NULL); if (IS_ERR(state->clk)) @@ -1188,7 +1186,7 @@ static int fiq_debugger_probe(struct platform_device *pdev) clk_disable(state->clk); if (state->clk) clk_put(state->clk); - wake_lock_destroy(&state->debugger_wake_lock); + wakeup_source_trash(&state->debugger_wake_src); platform_set_drvdata(pdev, NULL); kfree(state); return ret; -- GitLab From 1626ae31638046803acb97ac8417a495c8ba5eef Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Thu, 12 Jan 2017 12:34:22 -0800 Subject: [PATCH 1092/1442] ANDROID: gpio_matrix: Remove wakelock.h dependencies Change-Id: I228bcdebf28f5c67765002043d3f919718827316 Signed-off-by: Dmitry Shmidt --- drivers/input/misc/gpio_matrix.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/input/misc/gpio_matrix.c b/drivers/input/misc/gpio_matrix.c index eaa9e89d473a..08769dd88f56 100644 --- a/drivers/input/misc/gpio_matrix.c +++ b/drivers/input/misc/gpio_matrix.c @@ -19,13 +19,12 @@ #include #include #include -#include struct gpio_kp { struct gpio_event_input_devs *input_devs; struct gpio_event_matrix_info *keypad_info; struct hrtimer timer; - struct wake_lock wake_lock; + struct wakeup_source wake_src; int current_output; unsigned int use_irq:1; unsigned int key_state_changed:1; @@ -215,7 +214,7 @@ static enum hrtimer_restart gpio_keypad_timer_func(struct hrtimer *timer) } for (in = 0; in < mi->ninputs; in++) enable_irq(gpio_to_irq(mi->input_gpios[in])); - wake_unlock(&kp->wake_lock); + __pm_relax(&kp->wake_src); return HRTIMER_NORESTART; } @@ -242,7 +241,7 @@ static irqreturn_t gpio_keypad_irq_handler(int irq_in, void *dev_id) else gpio_direction_input(mi->output_gpios[i]); } - wake_lock(&kp->wake_lock); + __pm_stay_awake(&kp->wake_src); hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL); return IRQ_HANDLED; } @@ -396,7 +395,7 @@ int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs, hrtimer_init(&kp->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); kp->timer.function = gpio_keypad_timer_func; - wake_lock_init(&kp->wake_lock, WAKE_LOCK_SUSPEND, "gpio_kp"); + wakeup_source_init(&kp->wake_src, "gpio_kp"); err = gpio_keypad_request_irqs(kp); kp->use_irq = err == 0; @@ -406,7 +405,7 @@ int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs, kp->use_irq ? "interrupt" : "polling"); if (kp->use_irq) - wake_lock(&kp->wake_lock); + __pm_stay_awake(&kp->wake_src); hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL); return 0; @@ -420,7 +419,7 @@ int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs, free_irq(gpio_to_irq(mi->input_gpios[i]), kp); hrtimer_cancel(&kp->timer); - wake_lock_destroy(&kp->wake_lock); + wakeup_source_trash(&kp->wake_src); for (i = mi->noutputs - 1; i >= 0; i--) { err_gpio_direction_input_failed: gpio_free(mi->input_gpios[i]); -- GitLab From 48baaa32cef7e07f4877081f2fa98f079352ff01 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Thu, 12 Jan 2017 12:45:46 -0800 Subject: [PATCH 1093/1442] ANDROID: usb: otg-wakelock: Remove wakelock.h dependencies Change-Id: Ibff8d6e04cc475114bc0a91512d0ee3900768b06 Signed-off-by: Dmitry Shmidt --- drivers/usb/phy/Kconfig | 2 +- drivers/usb/phy/otg-wakelock.c | 15 ++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index aa5e9fc84642..6801de9cdac9 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -8,7 +8,7 @@ config USB_PHY config USB_OTG_WAKELOCK bool "Hold a wakelock when USB connected" - depends on WAKELOCK + depends on PM_WAKELOCKS select USB_OTG_UTILS help Select this to automatically hold a wakelock when USB is diff --git a/drivers/usb/phy/otg-wakelock.c b/drivers/usb/phy/otg-wakelock.c index 479376bfa484..ecd741027f53 100644 --- a/drivers/usb/phy/otg-wakelock.c +++ b/drivers/usb/phy/otg-wakelock.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -42,7 +41,7 @@ static DEFINE_SPINLOCK(otgwl_spinlock); struct otgwl_lock { char name[40]; - struct wake_lock wakelock; + struct wakeup_source wakesrc; bool held; }; @@ -57,22 +56,21 @@ static struct otgwl_lock vbus_lock; static void otgwl_hold(struct otgwl_lock *lock) { if (!lock->held) { - wake_lock(&lock->wakelock); + __pm_stay_awake(&lock->wakesrc); lock->held = true; } } static void otgwl_temporary_hold(struct otgwl_lock *lock) { - wake_lock_timeout(&lock->wakelock, - msecs_to_jiffies(TEMPORARY_HOLD_TIME)); + __pm_wakeup_event(&lock->wakesrc, TEMPORARY_HOLD_TIME); lock->held = false; } static void otgwl_drop(struct otgwl_lock *lock) { if (lock->held) { - wake_unlock(&lock->wakelock); + __pm_relax(&lock->wakesrc); lock->held = false; } } @@ -151,8 +149,7 @@ static int __init otg_wakelock_init(void) snprintf(vbus_lock.name, sizeof(vbus_lock.name), "vbus-%s", dev_name(otgwl_xceiv->dev)); - wake_lock_init(&vbus_lock.wakelock, WAKE_LOCK_SUSPEND, - vbus_lock.name); + wakeup_source_init(&vbus_lock.wakesrc, vbus_lock.name); otgwl_nb.notifier_call = otgwl_otg_notifications; ret = usb_register_notifier(otgwl_xceiv, &otgwl_nb); @@ -162,7 +159,7 @@ static int __init otg_wakelock_init(void) " failed\n", __func__, dev_name(otgwl_xceiv->dev)); otgwl_xceiv = NULL; - wake_lock_destroy(&vbus_lock.wakelock); + wakeup_source_trash(&vbus_lock.wakesrc); return ret; } -- GitLab From c5b8dcdea84b79829ca51e1071777caa4ec04f05 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 13 Jan 2017 11:05:00 -0800 Subject: [PATCH 1094/1442] ANDROID: dm: rebase for 4.9 Export the direct_access method of dm_linear target for dm-android-verity target. Signed-off-by: Badhri Jagan Sridharan Change-Id: I46556d882305e5194352946264cbc9c06e5038e4 --- drivers/md/dm-android-verity.c | 1 + drivers/md/dm-android-verity.h | 2 ++ drivers/md/dm-linear.c | 5 +++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index bb6c1285e499..881e7099d401 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -635,6 +635,7 @@ static int add_as_linear_device(struct dm_target *ti, char *dev) android_verity_target.status = dm_linear_status, android_verity_target.prepare_ioctl = dm_linear_prepare_ioctl, android_verity_target.iterate_devices = dm_linear_iterate_devices, + android_verity_target.direct_access = dm_linear_direct_access, android_verity_target.io_hints = NULL; err = dm_linear_ctr(ti, DM_LINEAR_ARGS, linear_table_args); diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h index 0c7ff6afec69..c8d7ab642780 100644 --- a/drivers/md/dm-android-verity.h +++ b/drivers/md/dm-android-verity.h @@ -118,4 +118,6 @@ extern int dm_linear_prepare_ioctl(struct dm_target *ti, extern int dm_linear_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data); extern int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv); +extern long dm_linear_direct_access(struct dm_target *ti, sector_t sector, + void **kaddr, pfn_t *pfn, long size); #endif /* DM_ANDROID_VERITY_H */ diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index ee2f40a91132..4ad62d680547 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -147,7 +147,7 @@ int dm_linear_iterate_devices(struct dm_target *ti, } EXPORT_SYMBOL_GPL(dm_linear_iterate_devices); -static long linear_direct_access(struct dm_target *ti, sector_t sector, +long dm_linear_direct_access(struct dm_target *ti, sector_t sector, void **kaddr, pfn_t *pfn, long size) { struct linear_c *lc = ti->private; @@ -164,6 +164,7 @@ static long linear_direct_access(struct dm_target *ti, sector_t sector, return ret; } +EXPORT_SYMBOL_GPL(dm_linear_direct_access); static struct target_type linear_target = { .name = "linear", @@ -175,7 +176,7 @@ static struct target_type linear_target = { .status = dm_linear_status, .prepare_ioctl = dm_linear_prepare_ioctl, .iterate_devices = dm_linear_iterate_devices, - .direct_access = linear_direct_access, + .direct_access = dm_linear_direct_access, }; int __init dm_linear_init(void) -- GitLab From ad3c02f8b3a52704295eec932e1ee10a5b427991 Mon Sep 17 00:00:00 2001 From: James Carr Date: Fri, 29 Jul 2016 19:02:16 -0700 Subject: [PATCH 1095/1442] ANDROID: Implement memory_state_time, used by qcom,cpubw New driver memory_state_time tracks time spent in different DDR frequency and bandwidth states. Memory drivers such as qcom,cpubw can post updated state to the driver after registering a callback. Processed by a workqueue Bandwidth buckets are read in from device tree in the relevant qualcomm section, can be defined in any quantity and spacing. The data is exposed at /sys/kernel/memory_state_time, able to be read by the Android framework. Functionality is behind a config option CONFIG_MEMORY_STATE_TIME Change-Id: I4fee165571cb975fb9eacbc9aada5e6d7dd748f0 Signed-off-by: James Carr --- .../bindings/misc/memory-state-time.txt | 8 + drivers/misc/Kconfig | 6 + drivers/misc/Makefile | 1 + drivers/misc/memory_state_time.c | 454 ++++++++++++++++++ include/linux/memory-state-time.h | 42 ++ 5 files changed, 511 insertions(+) create mode 100644 Documentation/devicetree/bindings/misc/memory-state-time.txt create mode 100644 drivers/misc/memory_state_time.c create mode 100644 include/linux/memory-state-time.h diff --git a/Documentation/devicetree/bindings/misc/memory-state-time.txt b/Documentation/devicetree/bindings/misc/memory-state-time.txt new file mode 100644 index 000000000000..c99a506c030d --- /dev/null +++ b/Documentation/devicetree/bindings/misc/memory-state-time.txt @@ -0,0 +1,8 @@ +Memory bandwidth and frequency state tracking + +Required properties: +- compatible : should be: + "memory-state-time" +- freq-tbl: Should contain entries with each frequency in Hz. +- bw-buckets: Should contain upper-bound limits for each bandwidth bucket in Mbps. + Must match the framework power_profile.xml for the device. diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 8d0e3475480f..407cf290c031 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -772,6 +772,12 @@ config UID_CPUTIME help Per UID based cpu time statistics exported to /proc/uid_cputime +config MEMORY_STATE_TIME + tristate "Memory freq/bandwidth time statistics" + depends on PROFILING + help + Memory time statistics exported to /sys/kernel/memory_state_time + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 3ac6e9537891..3f89debe4582 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_CXL_BASE) += cxl/ obj-$(CONFIG_PANEL) += panel.o obj-$(CONFIG_UID_CPUTIME) += uid_cputime.o +obj-$(CONFIG_MEMORY_STATE_TIME) += memory_state_time.o lkdtm-$(CONFIG_LKDTM) += lkdtm_core.o lkdtm-$(CONFIG_LKDTM) += lkdtm_bugs.o diff --git a/drivers/misc/memory_state_time.c b/drivers/misc/memory_state_time.c new file mode 100644 index 000000000000..34c797a06a31 --- /dev/null +++ b/drivers/misc/memory_state_time.c @@ -0,0 +1,454 @@ +/* drivers/misc/memory_state_time.c + * + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define KERNEL_ATTR_RO(_name) \ +static struct kobj_attribute _name##_attr = __ATTR_RO(_name) + +#define KERNEL_ATTR_RW(_name) \ +static struct kobj_attribute _name##_attr = \ + __ATTR(_name, 0644, _name##_show, _name##_store) + +#define FREQ_HASH_BITS 4 +DECLARE_HASHTABLE(freq_hash_table, FREQ_HASH_BITS); + +static DEFINE_MUTEX(mem_lock); + +#define TAG "memory_state_time" +#define BW_NODE "/soc/memory-state-time" +#define FREQ_TBL "freq-tbl" +#define BW_TBL "bw-buckets" +#define NUM_SOURCES "num-sources" + +#define LOWEST_FREQ 2 + +static int curr_bw; +static int curr_freq; +static u32 *bw_buckets; +static u32 *freq_buckets; +static int num_freqs; +static int num_buckets; +static int registered_bw_sources; +static u64 last_update; +static bool init_success; +static struct workqueue_struct *memory_wq; +static u32 num_sources = 10; +static int *bandwidths; + +struct freq_entry { + int freq; + u64 *buckets; /* Bandwidth buckets. */ + struct hlist_node hash; +}; + +struct queue_container { + struct work_struct update_state; + int value; + u64 time_now; + int id; + struct mutex *lock; +}; + +static int find_bucket(int bw) +{ + int i; + + if (bw_buckets != NULL) { + for (i = 0; i < num_buckets; i++) { + if (bw_buckets[i] > bw) { + pr_debug("Found bucket %d for bandwidth %d\n", + i, bw); + return i; + } + } + return num_buckets - 1; + } + return 0; +} + +static u64 get_time_diff(u64 time_now) +{ + u64 ms; + + ms = time_now - last_update; + last_update = time_now; + return ms; +} + +static ssize_t show_stat_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int i, j; + int len = 0; + struct freq_entry *freq_entry; + + for (i = 0; i < num_freqs; i++) { + hash_for_each_possible(freq_hash_table, freq_entry, hash, + freq_buckets[i]) { + if (freq_entry->freq == freq_buckets[i]) { + len += scnprintf(buf + len, PAGE_SIZE - len, + "%d ", freq_buckets[i]); + if (len >= PAGE_SIZE) + break; + for (j = 0; j < num_buckets; j++) { + len += scnprintf(buf + len, + PAGE_SIZE - len, + "%llu ", + freq_entry->buckets[j]); + } + len += scnprintf(buf + len, PAGE_SIZE - len, + "\n"); + } + } + } + pr_debug("Current Time: %llu\n", ktime_get_boot_ns()); + return len; +} +KERNEL_ATTR_RO(show_stat); + +static void update_table(u64 time_now) +{ + struct freq_entry *freq_entry; + + pr_debug("Last known bw %d freq %d\n", curr_bw, curr_freq); + hash_for_each_possible(freq_hash_table, freq_entry, hash, curr_freq) { + if (curr_freq == freq_entry->freq) { + freq_entry->buckets[find_bucket(curr_bw)] + += get_time_diff(time_now); + break; + } + } +} + +static bool freq_exists(int freq) +{ + int i; + + for (i = 0; i < num_freqs; i++) { + if (freq == freq_buckets[i]) + return true; + } + return false; +} + +static int calculate_total_bw(int bw, int index) +{ + int i; + int total_bw = 0; + + pr_debug("memory_state_time New bw %d for id %d\n", bw, index); + bandwidths[index] = bw; + for (i = 0; i < registered_bw_sources; i++) + total_bw += bandwidths[i]; + return total_bw; +} + +static void freq_update_do_work(struct work_struct *work) +{ + struct queue_container *freq_state_update + = container_of(work, struct queue_container, + update_state); + if (freq_state_update) { + mutex_lock(&mem_lock); + update_table(freq_state_update->time_now); + curr_freq = freq_state_update->value; + mutex_unlock(&mem_lock); + kfree(freq_state_update); + } +} + +static void bw_update_do_work(struct work_struct *work) +{ + struct queue_container *bw_state_update + = container_of(work, struct queue_container, + update_state); + if (bw_state_update) { + mutex_lock(&mem_lock); + update_table(bw_state_update->time_now); + curr_bw = calculate_total_bw(bw_state_update->value, + bw_state_update->id); + mutex_unlock(&mem_lock); + kfree(bw_state_update); + } +} + +static void memory_state_freq_update(struct memory_state_update_block *ub, + int value) +{ + if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) { + if (freq_exists(value) && init_success) { + struct queue_container *freq_container + = kmalloc(sizeof(struct queue_container), + GFP_KERNEL); + if (!freq_container) + return; + INIT_WORK(&freq_container->update_state, + freq_update_do_work); + freq_container->time_now = ktime_get_boot_ns(); + freq_container->value = value; + pr_debug("Scheduling freq update in work queue\n"); + queue_work(memory_wq, &freq_container->update_state); + } else { + pr_debug("Freq does not exist.\n"); + } + } +} + +static void memory_state_bw_update(struct memory_state_update_block *ub, + int value) +{ + if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) { + if (init_success) { + struct queue_container *bw_container + = kmalloc(sizeof(struct queue_container), + GFP_KERNEL); + if (!bw_container) + return; + INIT_WORK(&bw_container->update_state, + bw_update_do_work); + bw_container->time_now = ktime_get_boot_ns(); + bw_container->value = value; + bw_container->id = ub->id; + pr_debug("Scheduling bandwidth update in work queue\n"); + queue_work(memory_wq, &bw_container->update_state); + } + } +} + +struct memory_state_update_block *memory_state_register_frequency_source(void) +{ + struct memory_state_update_block *block; + + if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) { + pr_debug("Allocating frequency source\n"); + block = kmalloc(sizeof(struct memory_state_update_block), + GFP_KERNEL); + if (!block) + return NULL; + block->update_call = memory_state_freq_update; + return block; + } + pr_err("Config option disabled.\n"); + return NULL; +} +EXPORT_SYMBOL_GPL(memory_state_register_frequency_source); + +struct memory_state_update_block *memory_state_register_bandwidth_source(void) +{ + struct memory_state_update_block *block; + + if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) { + pr_debug("Allocating bandwidth source %d\n", + registered_bw_sources); + block = kmalloc(sizeof(struct memory_state_update_block), + GFP_KERNEL); + if (!block) + return NULL; + block->update_call = memory_state_bw_update; + if (registered_bw_sources < num_sources) { + block->id = registered_bw_sources++; + } else { + pr_err("Unable to allocate source; max number reached\n"); + kfree(block); + return NULL; + } + return block; + } + pr_err("Config option disabled.\n"); + return NULL; +} +EXPORT_SYMBOL_GPL(memory_state_register_bandwidth_source); + +/* Buckets are designated by their maximum. + * Returns the buckets decided by the capability of the device. + */ +static int get_bw_buckets(struct device *dev) +{ + int ret, lenb; + struct device_node *node = dev->of_node; + + of_property_read_u32(node, NUM_SOURCES, &num_sources); + if (of_find_property(node, BW_TBL, &lenb)) { + bandwidths = devm_kzalloc(dev, + sizeof(*bandwidths) * num_sources, GFP_KERNEL); + if (!bandwidths) + return -ENOMEM; + lenb /= sizeof(*bw_buckets); + bw_buckets = devm_kzalloc(dev, lenb * sizeof(*bw_buckets), + GFP_KERNEL); + if (!bw_buckets) { + devm_kfree(dev, bandwidths); + return -ENOMEM; + } + ret = of_property_read_u32_array(node, BW_TBL, bw_buckets, + lenb); + if (ret < 0) { + devm_kfree(dev, bandwidths); + devm_kfree(dev, bw_buckets); + pr_err("Unable to read bandwidth table from device tree.\n"); + return ret; + } + } + curr_bw = 0; + num_buckets = lenb; + return 0; +} + +/* Adds struct freq_entry nodes to the hashtable for each compatible frequency. + * Returns the supported number of frequencies. + */ +static int freq_buckets_init(struct device *dev) +{ + struct freq_entry *freq_entry; + int i; + int ret, lenf; + struct device_node *node = dev->of_node; + + if (of_find_property(node, FREQ_TBL, &lenf)) { + lenf /= sizeof(*freq_buckets); + freq_buckets = devm_kzalloc(dev, lenf * sizeof(*freq_buckets), + GFP_KERNEL); + if (!freq_buckets) + return -ENOMEM; + pr_debug("freqs found len %d\n", lenf); + ret = of_property_read_u32_array(node, FREQ_TBL, freq_buckets, + lenf); + if (ret < 0) { + devm_kfree(dev, freq_buckets); + pr_err("Unable to read frequency table from device tree.\n"); + return ret; + } + pr_debug("ret freq %d\n", ret); + } + num_freqs = lenf; + curr_freq = freq_buckets[LOWEST_FREQ]; + + for (i = 0; i < num_freqs; i++) { + freq_entry = devm_kzalloc(dev, sizeof(struct freq_entry), + GFP_KERNEL); + if (!freq_entry) + return -ENOMEM; + freq_entry->buckets = devm_kzalloc(dev, sizeof(u64)*num_buckets, + GFP_KERNEL); + if (!freq_entry->buckets) { + devm_kfree(dev, freq_entry); + return -ENOMEM; + } + pr_debug("memory_state_time Adding freq to ht %d\n", + freq_buckets[i]); + freq_entry->freq = freq_buckets[i]; + hash_add(freq_hash_table, &freq_entry->hash, freq_buckets[i]); + } + return 0; +} + +struct kobject *memory_kobj; +EXPORT_SYMBOL_GPL(memory_kobj); + +static struct attribute *memory_attrs[] = { + &show_stat_attr.attr, + NULL +}; + +static struct attribute_group memory_attr_group = { + .attrs = memory_attrs, +}; + +static int memory_state_time_probe(struct platform_device *pdev) +{ + int error; + + error = get_bw_buckets(&pdev->dev); + if (error) + return error; + error = freq_buckets_init(&pdev->dev); + if (error) + return error; + last_update = ktime_get_boot_ns(); + init_success = true; + + pr_debug("memory_state_time initialized with num_freqs %d\n", + num_freqs); + return 0; +} + +static const struct of_device_id match_table[] = { + { .compatible = "memory-state-time" }, + {} +}; + +static struct platform_driver memory_state_time_driver = { + .probe = memory_state_time_probe, + .driver = { + .name = "memory-state-time", + .of_match_table = match_table, + .owner = THIS_MODULE, + }, +}; + +static int __init memory_state_time_init(void) +{ + int error; + + hash_init(freq_hash_table); + memory_wq = create_singlethread_workqueue("memory_wq"); + if (!memory_wq) { + pr_err("Unable to create workqueue.\n"); + return -EINVAL; + } + /* + * Create sys/kernel directory for memory_state_time. + */ + memory_kobj = kobject_create_and_add(TAG, kernel_kobj); + if (!memory_kobj) { + pr_err("Unable to allocate memory_kobj for sysfs directory.\n"); + error = -ENOMEM; + goto wq; + } + error = sysfs_create_group(memory_kobj, &memory_attr_group); + if (error) { + pr_err("Unable to create sysfs folder.\n"); + goto kobj; + } + + error = platform_driver_register(&memory_state_time_driver); + if (error) { + pr_err("Unable to register memory_state_time platform driver.\n"); + goto group; + } + return 0; + +group: sysfs_remove_group(memory_kobj, &memory_attr_group); +kobj: kobject_put(memory_kobj); +wq: destroy_workqueue(memory_wq); + return error; +} +module_init(memory_state_time_init); diff --git a/include/linux/memory-state-time.h b/include/linux/memory-state-time.h new file mode 100644 index 000000000000..d2212b027866 --- /dev/null +++ b/include/linux/memory-state-time.h @@ -0,0 +1,42 @@ +/* include/linux/memory-state-time.h + * + * Copyright (C) 2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include + +#define UPDATE_MEMORY_STATE(BLOCK, VALUE) BLOCK->update_call(BLOCK, VALUE) + +struct memory_state_update_block; + +typedef void (*memory_state_update_fn_t)(struct memory_state_update_block *ub, + int value); + +/* This struct is populated when you pass it to a memory_state_register* + * function. The update_call function is used for an update and defined in the + * typedef memory_state_update_fn_t + */ +struct memory_state_update_block { + memory_state_update_fn_t update_call; + int id; +}; + +/* Register a frequency struct memory_state_update_block to provide updates to + * memory_state_time about frequency changes using its update_call function. + */ +struct memory_state_update_block *memory_state_register_frequency_source(void); + +/* Register a bandwidth struct memory_state_update_block to provide updates to + * memory_state_time about bandwidth changes using its update_call function. + */ +struct memory_state_update_block *memory_state_register_bandwidth_source(void); -- GitLab From c250cd6ebb7fd69045ec5c860e5da520936e0516 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 9 Aug 2016 16:22:43 +0530 Subject: [PATCH 1096/1442] ANDROID: configs: merge AOSP config fragments Upstream now supports AOSP kernel config fragments: commit 27eb6622ab67 ("config: add android config fragments"). This patch merge non-upstream AOSP config fragments from android/configs/android-* of common kernel experimental/android-4.9 to kernel/configs/android-*. Added initial set of AOSP config fragments and a README.android, from AOSP Change-ID: I3a4883f3b04d2820e90ceb3c4d02390d6458d6ce ("android: configs: Initial commit of Android config fragments"), to explain the purpose of Android config fragments and how to use them to generate a device config compatible with Android. Signed-off-by: Amit Pundir --- kernel/configs/README.android | 15 +++++++++++++++ kernel/configs/android-base.config | 5 +++++ kernel/configs/android-recommended.config | 2 ++ 3 files changed, 22 insertions(+) create mode 100644 kernel/configs/README.android diff --git a/kernel/configs/README.android b/kernel/configs/README.android new file mode 100644 index 000000000000..2e2d7c001275 --- /dev/null +++ b/kernel/configs/README.android @@ -0,0 +1,15 @@ +The android-*.config files in this directory are meant to be used as a base +for an Android kernel config. All devices should have the options in +android-base.config enabled. While not mandatory, the options in +android-recommended.config enable advanced Android features. + +Assuming you already have a minimalist defconfig for your device, a possible +way to enable these options would be: + + ARCH= scripts/kconfig/merge_config.sh /_defconfig kernel/configs/android-base.config kernel/configs/android-recommended.config + +This will generate a .config that can then be used to save a new defconfig or +compile a new kernel with Android features enabled. + +Because there is no tool to consistently generate these config fragments, +lets keep them alphabetically sorted instead of random. diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config index 1a8f34f63601..bd645375535f 100644 --- a/kernel/configs/android-base.config +++ b/kernel/configs/android-base.config @@ -76,6 +76,9 @@ CONFIG_NETFILTER_XT_MATCH_MAC=y CONFIG_NETFILTER_XT_MATCH_MARK=y CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y CONFIG_NETFILTER_XT_MATCH_QUOTA=y CONFIG_NETFILTER_XT_MATCH_SOCKET=y CONFIG_NETFILTER_XT_MATCH_STATE=y @@ -124,6 +127,8 @@ CONFIG_PACKET=y CONFIG_PM_AUTOSLEEP=y CONFIG_PM_WAKELOCKS=y CONFIG_PPP=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y CONFIG_PPP_BSDCOMP=y CONFIG_PPP_DEFLATE=y CONFIG_PPP_MPPE=y diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config index 297756be369c..137885234cac 100644 --- a/kernel/configs/android-recommended.config +++ b/kernel/configs/android-recommended.config @@ -75,6 +75,8 @@ CONFIG_HID_ZYDACRON=y CONFIG_INPUT_EVDEV=y CONFIG_INPUT_GPIO=y CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_KEYRESET=y CONFIG_INPUT_MISC=y CONFIG_INPUT_TABLET=y CONFIG_INPUT_UINPUT=y -- GitLab From c88356f9dba5209d5617f474ff7985af878d730a Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 13 Jul 2015 17:54:56 +0530 Subject: [PATCH 1097/1442] ANDROID: configs: base: enable configfs gadget functions Now that Android is moving towards ConfigFS based USB gadgets, lets enable USB_CONFIGFS and relevant Android gadget functions instead of obsolete USB_G_ANDROID composite driver which doesn't exist now. Enabled following ConfigFS gadget functions: F_FS for ADB F_MTP/PTP for MTP/PTP F_ACC for Android USB Accessory F_AUDIO_SRC for USB Audio Source F_MIDI for MIDI, and CONFIGFS_UEVENT for communicating USB state change notifications to userspace. Signed-off-by: Amit Pundir --- kernel/configs/android-base.config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config index bd645375535f..c4fc478578bb 100644 --- a/kernel/configs/android-base.config +++ b/kernel/configs/android-base.config @@ -149,6 +149,11 @@ CONFIG_UNIX=y CONFIG_USB_GADGET=y CONFIG_USB_CONFIGFS=y CONFIG_USB_CONFIGFS_F_FS=y +CONFIG_USB_CONFIGFS_F_MTP=y +CONFIG_USB_CONFIGFS_F_PTP=y +CONFIG_USB_CONFIGFS_F_ACC=y +CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y +CONFIG_USB_CONFIGFS_UEVENT=y CONFIG_USB_CONFIGFS_F_MIDI=y CONFIG_USB_OTG_WAKELOCK=y CONFIG_XFRM_USER=y -- GitLab From 270ef0c00a5286c957f3131a39adfb6890314c6b Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Wed, 1 Jun 2016 13:44:47 -0700 Subject: [PATCH 1098/1442] ANDROID: kernel/configs: base: restrict access to perf events Add: CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y to android-base.cfg The kernel.perf_event_paranoid sysctl is set to 3 by default. No unprivileged use of the perf_event_open syscall will be permitted unless it is changed. Bug: 29054680 Change-Id: Ie7512259150e146d8e382dc64d40e8faaa438917 Signed-off-by: Amit Pundir --- kernel/configs/android-base.config | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config index c4fc478578bb..4ee7a5abf357 100644 --- a/kernel/configs/android-base.config +++ b/kernel/configs/android-base.config @@ -139,6 +139,7 @@ CONFIG_RT_GROUP_SCHED=y CONFIG_SECCOMP=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y CONFIG_SECURITY_SELINUX=y CONFIG_SETEND_EMULATION=y CONFIG_STAGING=y -- GitLab From 611ef60db3f3ab48d45bf37a7392b253c265e177 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 23 Jun 2016 15:35:07 +0530 Subject: [PATCH 1099/1442] ANDROID: kernel/configs: base: enable UID_CPUTIME Enabled UID_CPUTIME and dependent PROFILING config option. UID_CPUTIME (/proc/uid_cputime) interfaces provide amount of time a UID's processes spent executing in user-space and kernel-space. It is used by batterystats service. Signed-off-by: Amit Pundir --- kernel/configs/android-base.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config index 4ee7a5abf357..0144f0fbc66d 100644 --- a/kernel/configs/android-base.config +++ b/kernel/configs/android-base.config @@ -133,6 +133,7 @@ CONFIG_PPP_BSDCOMP=y CONFIG_PPP_DEFLATE=y CONFIG_PPP_MPPE=y CONFIG_PREEMPT=y +CONFIG_PROFILING=y CONFIG_QUOTA=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y @@ -146,6 +147,7 @@ CONFIG_STAGING=y CONFIG_SWP_EMULATION=y CONFIG_SYNC=y CONFIG_TUN=y +CONFIG_UID_CPUTIME=y CONFIG_UNIX=y CONFIG_USB_GADGET=y CONFIG_USB_CONFIGFS=y -- GitLab From abfff41cc69203161c9a8f4f2b46e6efe01a2a60 Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Sun, 12 Jun 2016 17:37:52 -0700 Subject: [PATCH 1100/1442] ANDROID: kernel/configs: recommended: enable fstack-protector-strong If compiler has stack protector support, set CONFIG_CC_STACKPROTECTOR_STRONG. Bug: 28967314 Change-Id: I588c2d544250e9e4b5082b43c237b8f85b7313ca Signed-off-by: Jeff Vander Stoep Signed-off-by: Amit Pundir --- kernel/configs/android-recommended.config | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config index 137885234cac..7e4bcfb6bc6a 100644 --- a/kernel/configs/android-recommended.config +++ b/kernel/configs/android-recommended.config @@ -10,6 +10,7 @@ CONFIG_BLK_DEV_DM=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_CC_STACKPROTECTOR_STRONG=y CONFIG_COMPACTION=y CONFIG_DEBUG_RODATA=y CONFIG_DM_CRYPT=y -- GitLab From 52e45973e797cfaad38192eea9bb303315df6921 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 19 Jan 2017 10:01:28 +1100 Subject: [PATCH 1101/1442] FROMLIST: config: android-recommended: disable aio support The aio interface adds substantial attack surface for a feature that's not being exposed by Android at all. It's unlikely that anyone is using the kernel feature directly either. This feature is rarely used even on servers. The glibc POSIX aio calls really use thread pools. The lack of widespread usage also means this is relatively poorly audited/tested. The kernel's aio rarely provides performance benefits over using a thread pool and is quite incomplete in terms of system call coverage along with having edge cases where blocking can occur. Part of the performance issue is the fact that it only supports direct io, not buffered io. The existing API is considered fundamentally flawed and it's unlikely it will be expanded, but rather replaced: https://marc.info/?l=linux-aio&m=145255815216051&w=2 Since ext4 encryption means no direct io support, kernel aio isn't even going to work properly on Android devices using file-based encryption. Reviewed-at: https://android-review.googlesource.com/#/c/292158/ Link: http://lkml.kernel.org/r/1481113148-29204-1-git-send-email-amit.pundir@linaro.org Signed-off-by: Daniel Micay Signed-off-by: Amit Pundir Cc: Rob Herring Cc: John Stultz Signed-off-by: Andrew Morton --- kernel/configs/android-recommended.config | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config index 7e4bcfb6bc6a..96788b2ff20d 100644 --- a/kernel/configs/android-recommended.config +++ b/kernel/configs/android-recommended.config @@ -1,4 +1,5 @@ # KEEP ALPHABETICALLY SORTED +# CONFIG_AIO is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_LEGACY_PTYS is not set -- GitLab From 95bb7b31fb24433348e84388e996d0a7e5afb35b Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 19 Jan 2017 10:01:28 +1100 Subject: [PATCH 1102/1442] FROMLIST: config: android-base: enable hardened usercopy and kernel ASLR Enable CONFIG_HARDENED_USERCOPY and CONFIG_RANDOMIZE_BASE in Android base config fragment. Reviewed at https://android-review.googlesource.com/#/c/283659/ Reviewed at https://android-review.googlesource.com/#/c/278133/ Link: http://lkml.kernel.org/r/1481113148-29204-2-git-send-email-amit.pundir@linaro.org Signed-off-by: Amit Pundir Cc: Rob Herring Cc: John Stultz Cc: Daniel Micay Signed-off-by: Andrew Morton --- kernel/configs/android-base.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config index 0144f0fbc66d..adc552e1f9b1 100644 --- a/kernel/configs/android-base.config +++ b/kernel/configs/android-base.config @@ -21,6 +21,7 @@ CONFIG_CP15_BARRIER_EMULATION=y CONFIG_DEFAULT_SECURITY_SELINUX=y CONFIG_EMBEDDED=y CONFIG_FB=y +CONFIG_HARDENED_USERCOPY=y CONFIG_HIGH_RES_TIMERS=y CONFIG_INET6_AH=y CONFIG_INET6_ESP=y @@ -135,6 +136,7 @@ CONFIG_PPP_MPPE=y CONFIG_PREEMPT=y CONFIG_PROFILING=y CONFIG_QUOTA=y +CONFIG_RANDOMIZE_BASE=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y CONFIG_SECCOMP=y -- GitLab From 9998c6978eee4a7f5145d394ad590fb0e027b0d8 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 20 Jan 2017 17:27:44 +0530 Subject: [PATCH 1103/1442] ANDROID: kernel/configs: recommended: Enable MEMORY_STATE_TIME Enable qcom's memory state tracking driver config CONFIG_MEMORY_STATE_TIME in android-recommended.config Signed-off-by: Amit Pundir --- kernel/configs/android-recommended.config | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config index 96788b2ff20d..94e2b0f41ab6 100644 --- a/kernel/configs/android-recommended.config +++ b/kernel/configs/android-recommended.config @@ -93,6 +93,7 @@ CONFIG_LOGIRUMBLEPAD2_FF=y CONFIG_LOGITECH_FF=y CONFIG_MD=y CONFIG_MEDIA_SUPPORT=y +CONFIG_MEMORY_STATE_TIME=y CONFIG_MSDOS_FS=y CONFIG_PANIC_TIMEOUT=5 CONFIG_PANTHERLORD_FF=y -- GitLab From 580a5379c7b2269ac22f1523d041515db15e3dc9 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Tue, 20 Dec 2016 11:08:34 -0800 Subject: [PATCH 1104/1442] ANDROID: kernel/configs: base: Enable QUOTA related configs Bug: 33757366 Change-Id: Iec4f55c3ca4a16dbc8695054f481d9261c56d0f6 Signed-off-by: Amit Pundir --- kernel/configs/android-base.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config index adc552e1f9b1..4732628dcf28 100644 --- a/kernel/configs/android-base.config +++ b/kernel/configs/android-base.config @@ -135,7 +135,11 @@ CONFIG_PPP_DEFLATE=y CONFIG_PPP_MPPE=y CONFIG_PREEMPT=y CONFIG_PROFILING=y +CONFIG_QFMT_V2=y CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QUOTA_TREE=y +CONFIG_QUOTACTL=y CONFIG_RANDOMIZE_BASE=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y -- GitLab From 3fa5b5beb9c63b42d163244b25a43d4e1ae05749 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 4 Jan 2017 09:11:04 -0800 Subject: [PATCH 1105/1442] ANDROID: kernel/configs: recommended: CONFIG_ARM64_SW_TTBR0_PAN=y Bug: 31432001 Change-Id: Ia72c3aa70a463d3a7f52b76e5082520aa328d29b Signed-off-by: Sami Tolvanen Signed-off-by: Amit Pundir --- kernel/configs/android-recommended.config | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config index 94e2b0f41ab6..919e82743da5 100644 --- a/kernel/configs/android-recommended.config +++ b/kernel/configs/android-recommended.config @@ -6,6 +6,7 @@ # CONFIG_NF_CONNTRACK_SIP is not set # CONFIG_PM_WAKELOCKS_GC is not set # CONFIG_VT is not set +CONFIG_ARM64_SW_TTBR0_PAN=y CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_BLK_DEV_DM=y CONFIG_BLK_DEV_LOOP=y -- GitLab From 52e61ad45a9fcfc1a55c6d3145bf1623b78a61c5 Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Sat, 20 Sep 2008 16:50:08 +0200 Subject: [PATCH 1106/1442] ANDROID: [CPUFREQ] Don't export governors for default governor We don't need to export the governors for use as the default governor, because the default governor will be built-in anyway and we can access the symbol directly. This also fixes the following sparse warnings: drivers/cpufreq/cpufreq_conservative.c:578:25: warning: symbol 'cpufreq_gov_conservative' was not declared. Should it be static? drivers/cpufreq/cpufreq_ondemand.c:582:25: warning: symbol 'cpufreq_gov_ondemand' was not declared. Should it be static? drivers/cpufreq/cpufreq_performance.c:39:25: warning: symbol 'cpufreq_gov_performance' was not declared. Should it be static? drivers/cpufreq/cpufreq_powersave.c:38:25: warning: symbol 'cpufreq_gov_powersave' was not declared. Should it be static? drivers/cpufreq/cpufreq_userspace.c:190:25: warning: symbol 'cpufreq_gov_userspace' was not declared. Should it be static? Signed-off-by: Sven Wegener Signed-off-by: Dave Jones Signed-off-by: Andres Oportus --- drivers/cpufreq/cpufreq_conservative.c | 3 +++ drivers/cpufreq/cpufreq_performance.c | 5 ++++- drivers/cpufreq/cpufreq_powersave.c | 5 ++++- drivers/cpufreq/cpufreq_userspace.c | 5 ++++- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 13475890d792..afa0364196e3 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -302,6 +302,9 @@ static void cs_start(struct cpufreq_policy *policy) dbs_info->requested_freq = policy->cur; } +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +static +#endif static struct dbs_governor cs_governor = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), .kobj_type = { .default_attrs = cs_attributes }, diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index dafb679adc58..399428e40e89 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -22,7 +22,10 @@ static void cpufreq_gov_performance_limits(struct cpufreq_policy *policy) __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); } -static struct cpufreq_governor cpufreq_gov_performance = { +#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE +static +#endif +struct cpufreq_governor cpufreq_gov_performance = { .name = "performance", .owner = THIS_MODULE, .limits = cpufreq_gov_performance_limits, diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 78a651038faf..5daa500fb0a9 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -22,7 +22,10 @@ static void cpufreq_gov_powersave_limits(struct cpufreq_policy *policy) __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); } -static struct cpufreq_governor cpufreq_gov_powersave = { +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +static +#endif +struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", .limits = cpufreq_gov_powersave_limits, .owner = THIS_MODULE, diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index bd897e3e134d..765166d881bb 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -118,7 +118,10 @@ static void cpufreq_userspace_policy_limits(struct cpufreq_policy *policy) mutex_unlock(&userspace_mutex); } -static struct cpufreq_governor cpufreq_gov_userspace = { +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +static +#endif +struct cpufreq_governor cpufreq_gov_userspace = { .name = "userspace", .init = cpufreq_userspace_policy_init, .exit = cpufreq_userspace_policy_exit, -- GitLab From c33be5d1187b88ea5936b01a7df3dbc71ad466b0 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Thu, 17 Sep 2015 16:10:56 +0100 Subject: [PATCH 1107/1442] ANDROID: cpufreq: Frequency invariant scheduler load-tracking support Implements cpufreq_scale_freq_capacity() to provide the scheduler with a frequency scaling correction factor for more accurate load-tracking. The factor is: current_freq(cpu) << SCHED_CAPACITY_SHIFT / max_freq(cpu) In fact, freq_scale should be a struct cpufreq_policy data member. But this would require that the scheduler hot path (__update_load_avg()) would have to grab the cpufreq lock. This can be avoided by using per-cpu data initialized to SCHED_CAPACITY_SCALE for freq_scale. Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- drivers/cpufreq/cpufreq.c | 29 +++++++++++++++++++++++++++++ include/linux/cpufreq.h | 3 +++ 2 files changed, 32 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c910111621d7..6f2dae7197a2 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -301,6 +301,31 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) #endif } +/********************************************************************* + * FREQUENCY INVARIANT CPU CAPACITY * + *********************************************************************/ + +static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; + +static void +scale_freq_capacity(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs) +{ + unsigned long cur = freqs ? freqs->new : policy->cur; + unsigned long scale = (cur << SCHED_CAPACITY_SHIFT) / policy->max; + int cpu; + + pr_debug("cpus %*pbl cur/cur max freq %lu/%u kHz freq scale %lu\n", + cpumask_pr_args(policy->cpus), cur, policy->max, scale); + + for_each_cpu(cpu, policy->cpus) + per_cpu(freq_scale, cpu) = scale; +} + +unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu) +{ + return per_cpu(freq_scale, cpu); +} + static void __cpufreq_notify_transition(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, unsigned int state) { @@ -405,6 +430,8 @@ void cpufreq_freq_transition_begin(struct cpufreq_policy *policy, spin_unlock(&policy->transition_lock); + scale_freq_capacity(policy, freqs); + cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE); } EXPORT_SYMBOL_GPL(cpufreq_freq_transition_begin); @@ -2192,6 +2219,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_NOTIFY, new_policy); + scale_freq_capacity(new_policy, NULL); + policy->min = new_policy->min; policy->max = new_policy->max; trace_cpu_frequency_limits(policy->max, policy->min, policy->cpu); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 32dc0cbd51ca..0bb2d1258869 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -886,4 +886,7 @@ unsigned int cpufreq_generic_get(unsigned int cpu); int cpufreq_generic_init(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table, unsigned int transition_latency); + +struct sched_domain; +unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu); #endif /* _LINUX_CPUFREQ_H */ -- GitLab From 42083ec1dec4d6e813e4efa2dc4853b79a23a747 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Wed, 23 Sep 2015 12:47:48 +0100 Subject: [PATCH 1108/1442] ANDROID: arm: Enable frequency invariant scheduler load-tracking support Defines arch_scale_freq_capacity() to use cpufreq implementation. Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- arch/arm/include/asm/topology.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 370f7a732900..a69917b7d2c9 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -24,6 +24,11 @@ void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); +#ifdef CONFIG_CPU_FREQ +#include +#define arch_scale_freq_capacity cpufreq_scale_freq_capacity +#endif + #else static inline void init_cpu_topology(void) { } -- GitLab From ea31a3e30d6caf131f3ff4a098d2722ac517953a Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Fri, 25 Sep 2015 17:15:11 +0100 Subject: [PATCH 1109/1442] ANDROID: arm64: Enable frequency invariant scheduler load-tracking support Defines arch_scale_freq_capacity() to use cpufreq implementation. Including in topology.h like for the arm arch doesn't work because of CONFIG_COMPAT=y (Kernel support for 32-bit EL0). That's why cpufreq_scale_freq_capacity() has to be declared extern in topology.h. Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- arch/arm64/include/asm/topology.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 8b57339823e9..f05b626f17c7 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -31,6 +31,11 @@ int pcibus_to_node(struct pci_bus *bus); cpumask_of_node(pcibus_to_node(bus))) #endif /* CONFIG_NUMA */ +#ifdef CONFIG_CPU_FREQ +#define arch_scale_freq_capacity cpufreq_scale_freq_capacity +struct sched_domain; +extern unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu); +#endif #include -- GitLab From 25cea247ff45ce1c670a02aa92ef96be860e30ee Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Tue, 14 Apr 2015 16:25:31 +0100 Subject: [PATCH 1110/1442] ANDROID: arm: Update arch_scale_cpu_capacity() to reflect change to define arch_scale_cpu_capacity() is no longer a weak function but a #define instead. Include the #define in topology.h. cc: Russell King Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- arch/arm/include/asm/topology.h | 2 ++ arch/arm/kernel/topology.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index a69917b7d2c9..e3e596cbb1a7 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -28,6 +28,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu); #include #define arch_scale_freq_capacity cpufreq_scale_freq_capacity #endif +#define arch_scale_cpu_capacity scale_cpu_capacity +extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu); #else diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index ec279d161b32..53ce7f6306cc 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -42,7 +42,7 @@ */ static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; -unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) +unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) { return per_cpu(cpu_scale, cpu); } -- GitLab From b9ac00948928940dffe6f4b5a8c8dea714ce3ed7 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Sat, 9 May 2015 19:53:49 +0100 Subject: [PATCH 1111/1442] ANDROID: sched: Add cpu capacity awareness to wakeup balancing Wakeup balancing is completely unaware of cpu capacity, cpu utilization and task utilization. The task is preferably placed on a cpu which is idle in the instant the wakeup happens. New tasks (SD_BALANCE_{FORK,EXEC} are placed on an idle cpu in the idlest group if such can be found, otherwise it goes on the least loaded one. Existing tasks (SD_BALANCE_WAKE) are placed on the previous cpu or an idle cpu sharing the same last level cache unless the wakee_flips heuristic in wake_wide() decides to fallback to considering cpus outside SD_LLC. Hence existing tasks are not guaranteed to get a chance to migrate to a different group at wakeup in case the current one has reduced cpu capacity (due RT/IRQ pressure or different uarch e.g. ARM big.LITTLE). They may eventually get pulled by other cpus doing periodic/idle/nohz_idle balance, but it may take quite a while before it happens. This patch adds capacity awareness to find_idlest_{group,queue} (used by SD_BALANCE_{FORK,EXEC} and SD_BALANCE_WAKE under certain circumstances) such that groups/cpus that can accommodate the waking task based on task utilization are preferred. In addition, wakeup of existing tasks (SD_BALANCE_WAKE) is sent through find_idlest_{group,queue} also if the task doesn't fit the capacity of the previous cpu to allow it to escape (override wake_affine) when necessary instead of relying on periodic/idle/nohz_idle balance to eventually sort it out. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 91 ++++++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 30 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bdb03b0104b7..b22ff748e421 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5200,6 +5200,41 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, return 1; } +static inline int task_util(struct task_struct *p) +{ + return p->se.avg.util_avg; +} + +static inline bool __task_fits(struct task_struct *p, int cpu, int util) +{ + unsigned long capacity = capacity_of(cpu); + + util += task_util(p); + + return (capacity * 1024) > (util * capacity_margin); +} + +static inline bool task_fits_max(struct task_struct *p, int cpu) +{ + unsigned long capacity = capacity_of(cpu); + unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity; + + if (capacity == max_capacity) + return true; + + if (capacity * capacity_margin > max_capacity * 1024) + return true; + + return __task_fits(p, cpu, 0); +} + +static int cpu_util(int cpu); + +static inline bool task_fits_spare(struct task_struct *p, int cpu) +{ + return __task_fits(p, cpu, cpu_util(cpu)); +} + /* * find_idlest_group finds and returns the least busy CPU group within the * domain. @@ -5209,7 +5244,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag) { struct sched_group *idlest = NULL, *group = sd->groups; + struct sched_group *fit_group = NULL; unsigned long min_load = ULONG_MAX, this_load = 0; + unsigned long fit_capacity = ULONG_MAX; int load_idx = sd->forkexec_idx; int imbalance = 100 + (sd->imbalance_pct-100)/2; @@ -5240,6 +5277,15 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, load = target_load(i, load_idx); avg_load += load; + + /* + * Look for most energy-efficient group that can fit + * that can fit the task. + */ + if (capacity_of(i) < fit_capacity && task_fits_spare(p, i)) { + fit_capacity = capacity_of(i); + fit_group = group; + } } /* Adjust by relative CPU capacity of the group */ @@ -5253,6 +5299,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, } } while (group = group->next, group != sd->groups); + if (fit_group) + return fit_group; + if (!idlest || 100*this_load < imbalance*min_load) return NULL; return idlest; @@ -5277,7 +5326,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) /* Traverse only the allowed CPUs */ for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) { - if (idle_cpu(i)) { + if (task_fits_spare(p, i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); if (idle && idle->exit_latency < min_exit_latency) { @@ -5289,7 +5338,8 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) min_exit_latency = idle->exit_latency; latest_idle_timestamp = rq->idle_stamp; shallowest_idle_cpu = i; - } else if ((!idle || idle->exit_latency == min_exit_latency) && + } else if (idle_cpu(i) && + (!idle || idle->exit_latency == min_exit_latency) && rq->idle_stamp > latest_idle_timestamp) { /* * If equal or no active idle state, then @@ -5298,6 +5348,13 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) */ latest_idle_timestamp = rq->idle_stamp; shallowest_idle_cpu = i; + } else if (shallowest_idle_cpu == -1) { + /* + * If we haven't found an idle CPU yet + * pick a non-idle one that can fit the task as + * fallback. + */ + shallowest_idle_cpu = i; } } else if (shallowest_idle_cpu == -1) { load = weighted_cpuload(i); @@ -5585,32 +5642,6 @@ static int cpu_util(int cpu) return (util >= capacity) ? capacity : util; } -static inline int task_util(struct task_struct *p) -{ - return p->se.avg.util_avg; -} - -/* - * Disable WAKE_AFFINE in the case where task @p doesn't fit in the - * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu. - * - * In that case WAKE_AFFINE doesn't make sense and we'll let - * BALANCE_WAKE sort things out. - */ -static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) -{ - long min_cap, max_cap; - - min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu)); - max_cap = cpu_rq(cpu)->rd->max_cpu_capacity; - - /* Minimum capacity is close to max, no need to abort wake_affine */ - if (max_cap - min_cap < max_cap >> 3) - return 0; - - return min_cap * 1024 < task_util(p) * capacity_margin; -} - /* * select_task_rq_fair: Select target runqueue for the waking task in domains * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, @@ -5634,8 +5665,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (sd_flag & SD_BALANCE_WAKE) { record_wakee(p); - want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) - && cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); + want_affine = !wake_wide(p) && task_fits_max(p, cpu) && + cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); } rcu_read_lock(); -- GitLab From de9b6366687bb2863bd27c87c3bfd1a91a78ac9c Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Mon, 6 Jul 2015 15:01:10 +0100 Subject: [PATCH 1112/1442] ANDROID: sched: Consider spare cpu capacity at task wake-up find_idlest_group() selects the wake-up target group purely based on group load which leads to suboptimal choices in low load scenarios. An idle group with reduced capacity (due to RT tasks or different cpu type) isn't necessarily a better target than a lightly loaded group with higher capacity. The patch adds spare capacity as an additional group selection parameter. The target group is now selected based on the following criteria: 1. Return the group with the cpu with most spare capacity and this capacity is significant if such group exists. Significant spare capacity is currently at least 20% to spare. 2. Return the group with the lowest load, unless it is the local group in which case NULL is returned and the search is continued at the next (lower) level. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b22ff748e421..0ce3c6aec175 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5244,9 +5244,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag) { struct sched_group *idlest = NULL, *group = sd->groups; - struct sched_group *fit_group = NULL; + struct sched_group *fit_group = NULL, *spare_group = NULL; unsigned long min_load = ULONG_MAX, this_load = 0; unsigned long fit_capacity = ULONG_MAX; + unsigned long max_spare_capacity = capacity_margin - SCHED_CAPACITY_SCALE; int load_idx = sd->forkexec_idx; int imbalance = 100 + (sd->imbalance_pct-100)/2; @@ -5254,7 +5255,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, load_idx = sd->wake_idx; do { - unsigned long load, avg_load; + unsigned long load, avg_load, spare_capacity; int local_group; int i; @@ -5286,6 +5287,16 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, fit_capacity = capacity_of(i); fit_group = group; } + + /* + * Look for group which has most spare capacity on a + * single cpu. + */ + spare_capacity = capacity_of(i) - cpu_util(i); + if (spare_capacity > max_spare_capacity) { + max_spare_capacity = spare_capacity; + spare_group = group; + } } /* Adjust by relative CPU capacity of the group */ @@ -5302,6 +5313,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, if (fit_group) return fit_group; + if (spare_group) + return spare_group; + if (!idlest || 100*this_load < imbalance*min_load) return NULL; return idlest; -- GitLab From 90f309fba3d54fed8ab6319143103cee34f60c0c Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Mon, 26 Jan 2015 19:47:28 +0000 Subject: [PATCH 1113/1442] ANDROID: sched: Enable idle balance to pull single task towards cpu with higher capacity We do not want to miss out on the ability to pull a single remaining task from a potential source cpu towards an idle destination cpu. Add an extra criteria to need_active_balance() to kick off active load balance if the source cpu is over-utilized and has lower capacity than the destination cpu. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0ce3c6aec175..dc4addbabf0a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5235,6 +5235,11 @@ static inline bool task_fits_spare(struct task_struct *p, int cpu) return __task_fits(p, cpu, cpu_util(cpu)); } +static bool cpu_overutilized(int cpu) +{ + return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); +} + /* * find_idlest_group finds and returns the least busy CPU group within the * domain. @@ -7679,6 +7684,13 @@ static int need_active_balance(struct lb_env *env) return 1; } + if ((capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) && + env->src_rq->cfs.h_nr_running == 1 && + cpu_overutilized(env->src_cpu) && + !cpu_overutilized(env->dst_cpu)) { + return 1; + } + return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); } -- GitLab From 94beeae886ccc509dacebed4ffe91f4691ca4179 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Thu, 2 Jul 2015 17:16:34 +0100 Subject: [PATCH 1114/1442] ANDROID: sched: Prevent unnecessary active balance of single task in sched group Scenarios with the busiest group having just one task and the local being idle on topologies with sched groups with different numbers of cpus manage to dodge all load-balance bailout conditions resulting the nr_balance_failed counter to be incremented. This eventually causes a pointless active migration of the task. This patch prevents this by not incrementing the counter when the busiest group only has one task. ASYM_PACKING migrations and migrations due to reduced capacity should still take place as these are explicitly captured by need_active_balance(). A better solution would be to not attempt the load-balance in the first place, but that requires significant changes to the order of bailout conditions and statistics gathering. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index dc4addbabf0a..225ececc1e93 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6311,6 +6311,7 @@ struct lb_env { int new_dst_cpu; enum cpu_idle_type idle; long imbalance; + unsigned int src_grp_nr_running; /* The set of CPUs under consideration for load-balancing */ struct cpumask *cpus; @@ -7281,6 +7282,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd if (env->sd->flags & SD_NUMA) env->fbq_type = fbq_classify_group(&sds->busiest_stat); + env->src_grp_nr_running = sds->busiest_stat.sum_nr_running; + if (!env->sd->parent) { /* update overload indicator if we are at root domain */ if (env->dst_rq->rd->overload != overload) @@ -7903,7 +7906,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, * excessive cache_hot migrations and active balances. */ if (idle != CPU_NEWLY_IDLE) - sd->nr_balance_failed++; + if (env.src_grp_nr_running > 1) + sd->nr_balance_failed++; if (need_active_balance(&env)) { raw_spin_lock_irqsave(&busiest->lock, flags); -- GitLab From 577dacab7046ffd093e64d5b3bfdf266e1a8ed81 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Tue, 13 Jan 2015 13:43:28 +0000 Subject: [PATCH 1115/1442] ANDROID: sched: Documentation for scheduler energy cost model This documentation patch provides an overview of the experimental scheduler energy costing model, associated data structures, and a reference recipe on how platforms can be characterized to derive energy models. Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- Documentation/scheduler/sched-energy.txt | 362 +++++++++++++++++++++++ 1 file changed, 362 insertions(+) create mode 100644 Documentation/scheduler/sched-energy.txt diff --git a/Documentation/scheduler/sched-energy.txt b/Documentation/scheduler/sched-energy.txt new file mode 100644 index 000000000000..dab2f9088b33 --- /dev/null +++ b/Documentation/scheduler/sched-energy.txt @@ -0,0 +1,362 @@ +Energy cost model for energy-aware scheduling (EXPERIMENTAL) + +Introduction +============= + +The basic energy model uses platform energy data stored in sched_group_energy +data structures attached to the sched_groups in the sched_domain hierarchy. The +energy cost model offers two functions that can be used to guide scheduling +decisions: + +1. static unsigned int sched_group_energy(struct energy_env *eenv) +2. static int energy_diff(struct energy_env *eenv) + +sched_group_energy() estimates the energy consumed by all cpus in a specific +sched_group including any shared resources owned exclusively by this group of +cpus. Resources shared with other cpus are excluded (e.g. later level caches). + +energy_diff() estimates the total energy impact of a utilization change. That +is, adding, removing, or migrating utilization (tasks). + +Both functions use a struct energy_env to specify the scenario to be evaluated: + + struct energy_env { + struct sched_group *sg_top; + struct sched_group *sg_cap; + int cap_idx; + int util_delta; + int src_cpu; + int dst_cpu; + int energy; + }; + +sg_top: sched_group to be evaluated. Not used by energy_diff(). + +sg_cap: sched_group covering the cpus in the same frequency domain. Set by +sched_group_energy(). + +cap_idx: Capacity state to be used for energy calculations. Set by +find_new_capacity(). + +util_delta: Amount of utilization to be added, removed, or migrated. + +src_cpu: Source cpu from where 'util_delta' utilization is removed. Should be +-1 if no source (e.g. task wake-up). + +dst_cpu: Destination cpu where 'util_delta' utilization is added. Should be -1 +if utilization is removed (e.g. terminating tasks). + +energy: Result of sched_group_energy(). + +The metric used to represent utilization is the actual per-entity running time +averaged over time using a geometric series. Very similar to the existing +per-entity load-tracking, but _not_ scaled by task priority and capped by the +capacity of the cpu. The latter property does mean that utilization may +underestimate the compute requirements for task on fully/over utilized cpus. +The greatest potential for energy savings without affecting performance too much +is scenarios where the system isn't fully utilized. If the system is deemed +fully utilized load-balancing should be done with task load (includes task +priority) instead in the interest of fairness and performance. + + +Background and Terminology +=========================== + +To make it clear from the start: + +energy = [joule] (resource like a battery on powered devices) +power = energy/time = [joule/second] = [watt] + +The goal of energy-aware scheduling is to minimize energy, while still getting +the job done. That is, we want to maximize: + + performance [inst/s] + -------------------- + power [W] + +which is equivalent to minimizing: + + energy [J] + ----------- + instruction + +while still getting 'good' performance. It is essentially an alternative +optimization objective to the current performance-only objective for the +scheduler. This alternative considers two objectives: energy-efficiency and +performance. Hence, there needs to be a user controllable knob to switch the +objective. Since it is early days, this is currently a sched_feature +(ENERGY_AWARE). + +The idea behind introducing an energy cost model is to allow the scheduler to +evaluate the implications of its decisions rather than applying energy-saving +techniques blindly that may only have positive effects on some platforms. At +the same time, the energy cost model must be as simple as possible to minimize +the scheduler latency impact. + +Platform topology +------------------ + +The system topology (cpus, caches, and NUMA information, not peripherals) is +represented in the scheduler by the sched_domain hierarchy which has +sched_groups attached at each level that covers one or more cpus (see +sched-domains.txt for more details). To add energy awareness to the scheduler +we need to consider power and frequency domains. + +Power domain: + +A power domain is a part of the system that can be powered on/off +independently. Power domains are typically organized in a hierarchy where you +may be able to power down just a cpu or a group of cpus along with any +associated resources (e.g. shared caches). Powering up a cpu means that all +power domains it is a part of in the hierarchy must be powered up. Hence, it is +more expensive to power up the first cpu that belongs to a higher level power +domain than powering up additional cpus in the same high level domain. Two +level power domain hierarchy example: + + Power source + +-------------------------------+----... +per group PD G G + | +----------+ | + +--------+-------| Shared | (other groups) +per-cpu PD G G | resource | + | | +----------+ + +-------+ +-------+ + | CPU 0 | | CPU 1 | + +-------+ +-------+ + +Frequency domain: + +Frequency domains (P-states) typically cover the same group of cpus as one of +the power domain levels. That is, there might be several smaller power domains +sharing the same frequency (P-state) or there might be a power domain spanning +multiple frequency domains. + +From a scheduling point of view there is no need to know the actual frequencies +[Hz]. All the scheduler cares about is the compute capacity available at the +current state (P-state) the cpu is in and any other available states. For that +reason, and to also factor in any cpu micro-architecture differences, compute +capacity scaling states are called 'capacity states' in this document. For SMP +systems this is equivalent to P-states. For mixed micro-architecture systems +(like ARM big.LITTLE) it is P-states scaled according to the micro-architecture +performance relative to the other cpus in the system. + +Energy modelling: +------------------ + +Due to the hierarchical nature of the power domains, the most obvious way to +model energy costs is therefore to associate power and energy costs with +domains (groups of cpus). Energy costs of shared resources are associated with +the group of cpus that share the resources, only the cost of powering the +cpu itself and any private resources (e.g. private L1 caches) is associated +with the per-cpu groups (lowest level). + +For example, for an SMP system with per-cpu power domains and a cluster level +(group of cpus) power domain we get the overall energy costs to be: + + energy = energy_cluster + n * energy_cpu + +where 'n' is the number of cpus powered up and energy_cluster is the cost paid +as soon as any cpu in the cluster is powered up. + +The power and frequency domains can naturally be mapped onto the existing +sched_domain hierarchy and sched_groups by adding the necessary data to the +existing data structures. + +The energy model considers energy consumption from two contributors (shown in +the illustration below): + +1. Busy energy: Energy consumed while a cpu and the higher level groups that it +belongs to are busy running tasks. Busy energy is associated with the state of +the cpu, not an event. The time the cpu spends in this state varies. Thus, the +most obvious platform parameter for this contribution is busy power +(energy/time). + +2. Idle energy: Energy consumed while a cpu and higher level groups that it +belongs to are idle (in a C-state). Like busy energy, idle energy is associated +with the state of the cpu. Thus, the platform parameter for this contribution +is idle power (energy/time). + +Energy consumed during transitions from an idle-state (C-state) to a busy state +(P-state) or going the other way is ignored by the model to simplify the energy +model calculations. + + + Power + ^ + | busy->idle idle->busy + | transition transition + | + | _ __ + | / \ / \__________________ + |______________/ \ / + | \ / + | Busy \ Idle / Busy + | low P-state \____________/ high P-state + | + +------------------------------------------------------------> time + +Busy |--------------| |-----------------| + +Wakeup |------| |------| + +Idle |------------| + + +The basic algorithm +==================== + +The basic idea is to determine the total energy impact when utilization is +added or removed by estimating the impact at each level in the sched_domain +hierarchy starting from the bottom (sched_group contains just a single cpu). +The energy cost comes from busy time (sched_group is awake because one or more +cpus are busy) and idle time (in an idle-state). Energy model numbers account +for energy costs associated with all cpus in the sched_group as a group. + + for_each_domain(cpu, sd) { + sg = sched_group_of(cpu) + energy_before = curr_util(sg) * busy_power(sg) + + (1-curr_util(sg)) * idle_power(sg) + energy_after = new_util(sg) * busy_power(sg) + + (1-new_util(sg)) * idle_power(sg) + energy_diff += energy_before - energy_after + + } + + return energy_diff + +{curr, new}_util: The cpu utilization at the lowest level and the overall +non-idle time for the entire group for higher levels. Utilization is in the +range 0.0 to 1.0 in the pseudo-code. + +busy_power: The power consumption of the sched_group. + +idle_power: The power consumption of the sched_group when idle. + +Note: It is a fundamental assumption that the utilization is (roughly) scale +invariant. Task utilization tracking factors in any frequency scaling and +performance scaling differences due to difference cpu microarchitectures such +that task utilization can be used across the entire system. + + +Platform energy data +===================== + +struct sched_group_energy can be attached to sched_groups in the sched_domain +hierarchy and has the following members: + +cap_states: + List of struct capacity_state representing the supported capacity states + (P-states). struct capacity_state has two members: cap and power, which + represents the compute capacity and the busy_power of the state. The + list must be ordered by capacity low->high. + +nr_cap_states: + Number of capacity states in cap_states list. + +idle_states: + List of struct idle_state containing idle_state power cost for each + idle-state supported by the system orderd by shallowest state first. + All states must be included at all level in the hierarchy, i.e. a + sched_group spanning just a single cpu must also include coupled + idle-states (cluster states). In addition to the cpuidle idle-states, + the list must also contain an entry for the idling using the arch + default idle (arch_idle_cpu()). Despite this state may not be a true + hardware idle-state it is considered the shallowest idle-state in the + energy model and must be the first entry. cpus may enter this state + (possibly 'active idling') if cpuidle decides not enter a cpuidle + idle-state. Default idle may not be used when cpuidle is enabled. + In this case, it should just be a copy of the first cpuidle idle-state. + +nr_idle_states: + Number of idle states in idle_states list. + +There are no unit requirements for the energy cost data. Data can be normalized +with any reference, however, the normalization must be consistent across all +energy cost data. That is, one bogo-joule/watt must be the same quantity for +data, but we don't care what it is. + +A recipe for platform characterization +======================================= + +Obtaining the actual model data for a particular platform requires some way of +measuring power/energy. There isn't a tool to help with this (yet). This +section provides a recipe for use as reference. It covers the steps used to +characterize the ARM TC2 development platform. This sort of measurements is +expected to be done anyway when tuning cpuidle and cpufreq for a given +platform. + +The energy model needs two types of data (struct sched_group_energy holds +these) for each sched_group where energy costs should be taken into account: + +1. Capacity state information + +A list containing the compute capacity and power consumption when fully +utilized attributed to the group as a whole for each available capacity state. +At the lowest level (group contains just a single cpu) this is the power of the +cpu alone without including power consumed by resources shared with other cpus. +It basically needs to fit the basic modelling approach described in "Background +and Terminology" section: + + energy_system = energy_shared + n * energy_cpu + +for a system containing 'n' busy cpus. Only 'energy_cpu' should be included at +the lowest level. 'energy_shared' is included at the next level which +represents the group of cpus among which the resources are shared. + +This model is, of course, a simplification of reality. Thus, power/energy +attributions might not always exactly represent how the hardware is designed. +Also, busy power is likely to depend on the workload. It is therefore +recommended to use a representative mix of workloads when characterizing the +capacity states. + +If the group has no capacity scaling support, the list will contain a single +state where power is the busy power attributed to the group. The capacity +should be set to a default value (1024). + +When frequency domains include multiple power domains, the group representing +the frequency domain and all child groups share capacity states. This must be +indicated by setting the SD_SHARE_CAP_STATES sched_domain flag. All groups at +all levels that share the capacity state must have the list of capacity states +with the power set to the contribution of the individual group. + +2. Idle power information + +Stored in the idle_states list. The power number is the group idle power +consumption in each idle state as well when the group is idle but has not +entered an idle-state ('active idle' as mentioned earlier). Due to the way the +energy model is defined, the idle power of the deepest group idle state can +alternatively be accounted for in the parent group busy power. In that case the +group idle state power values are offset such that the idle power of the +deepest state is zero. It is less intuitive, but it is easier to measure as +idle power consumed by the group and the busy/idle power of the parent group +cannot be distinguished without per group measurement points. + +Measuring capacity states and idle power: + +The capacity states' capacity and power can be estimated by running a benchmark +workload at each available capacity state. By restricting the benchmark to run +on subsets of cpus it is possible to extrapolate the power consumption of +shared resources. + +ARM TC2 has two clusters of two and three cpus respectively. Each cluster has a +shared L2 cache. TC2 has on-chip energy counters per cluster. Running a +benchmark workload on just one cpu in a cluster means that power is consumed in +the cluster (higher level group) and a single cpu (lowest level group). Adding +another benchmark task to another cpu increases the power consumption by the +amount consumed by the additional cpu. Hence, it is possible to extrapolate the +cluster busy power. + +For platforms that don't have energy counters or equivalent instrumentation +built-in, it may be possible to use an external DAQ to acquire similar data. + +If the benchmark includes some performance score (for example sysbench cpu +benchmark), this can be used to record the compute capacity. + +Measuring idle power requires insight into the idle state implementation on the +particular platform. Specifically, if the platform has coupled idle-states (or +package states). To measure non-coupled per-cpu idle-states it is necessary to +keep one cpu busy to keep any shared resources alive to isolate the idle power +of the cpu from idle/busy power of the shared resources. The cpu can be tricked +into different per-cpu idle states by disabling the other states. Based on +various combinations of measurements with specific cpus busy and disabling +idle-states it is possible to extrapolate the idle-state power. -- GitLab From 94c4cea62463e122108231bad9b03110b42337e0 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Tue, 13 Jan 2015 13:45:51 +0000 Subject: [PATCH 1116/1442] ANDROID: sched: Make energy awareness a sched feature This patch introduces the ENERGY_AWARE sched feature, which is implemented using jump labels when SCHED_DEBUG is defined. It is statically set false when SCHED_DEBUG is not defined. Hence this doesn't allow energy awareness to be enabled without SCHED_DEBUG. This sched_feature knob will be replaced later with a more appropriate control knob when things have matured a bit. ENERGY_AWARE is based on per-entity load-tracking hence FAIR_GROUP_SCHED must be enable. This dependency isn't checked at compile time yet. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- include/linux/sched.h | 19 +++++++++++++++++++ kernel/sched/fair.c | 5 +++++ kernel/sched/features.h | 5 +++++ kernel/sched/sched.h | 1 + 4 files changed, 30 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index e9c009dc3a4a..1a903e48b92b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1067,6 +1067,22 @@ struct sched_domain_attr { extern int sched_domain_level_max; +struct capacity_state { + unsigned long cap; /* compute capacity */ + unsigned long power; /* power consumption at this compute capacity */ +}; + +struct idle_state { + unsigned long power; /* power consumption in this idle state */ +}; + +struct sched_group_energy { + unsigned int nr_idle_states; /* number of idle states */ + struct idle_state *idle_states; /* ptr to idle state array */ + unsigned int nr_cap_states; /* number of capacity states */ + struct capacity_state *cap_states; /* ptr to capacity state array */ +}; + struct sched_group; struct sched_domain_shared { @@ -1174,6 +1190,8 @@ bool cpus_share_cache(int this_cpu, int that_cpu); typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); typedef int (*sched_domain_flags_f)(void); +typedef +const struct sched_group_energy * const(*sched_domain_energy_f)(int cpu); #define SDTL_OVERLAP 0x01 @@ -1187,6 +1205,7 @@ struct sd_data { struct sched_domain_topology_level { sched_domain_mask_f mask; sched_domain_flags_f sd_flags; + sched_domain_energy_f energy; int flags; int numa_level; struct sd_data data; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 225ececc1e93..135be535a4ef 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5104,6 +5104,11 @@ static void record_wakee(struct task_struct *p) } } +static inline bool energy_aware(void) +{ + return sched_feat(ENERGY_AWARE); +} + /* * Detect M:N waker/wakee relationships via a switching-frequency heuristic. * diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 69631fa46c2f..b634151ce286 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -69,3 +69,8 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) SCHED_FEAT(ATTACH_AGE_LOAD, true) +/* + * Energy aware scheduling. Use platform energy model to guide scheduling + * decisions optimizing for energy efficiency. + */ +SCHED_FEAT(ENERGY_AWARE, false) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 055f935d4421..be18b1234d6d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -905,6 +905,7 @@ struct sched_group { unsigned int group_weight; struct sched_group_capacity *sgc; + const struct sched_group_energy const *sge; /* * The CPUs this group covers. -- GitLab From dd23c09a5af6037b213503bfd99f5d826c7fe406 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Fri, 14 Nov 2014 16:20:20 +0000 Subject: [PATCH 1117/1442] ANDROID: sched: Initialize energy data structures The sched_group_energy (sge) pointer of the first sched_group (sg) in the sched_domain (sd) is initialized to point to the appropriate (in terms of sd level and cpu) sge data defined in the arch and so to the correct part of the Energy Model (EM). Energy-aware scheduling allows that a system has only EM data up to a certain sd level (so called highest energy aware balancing sd level). A check in init_sched_energy() enforces that all sd's below this sd level contain EM data. The 'int cpu' parameter of sched_domain_energy_f requires that check_sched_energy_data() makes sure that all cpus spanned by a sg are provisioned with the same EM data. This patch has also been tested with feature FORCE_SD_OVERLAP enabled. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- kernel/sched/core.c | 65 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 79e5e07d3a2c..8d30289b8cea 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6311,6 +6311,66 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) update_group_capacity(sd, cpu); } +/* + * Check that the per-cpu provided sd energy data is consistent for all cpus + * within the mask. + */ +static inline void check_sched_energy_data(int cpu, sched_domain_energy_f fn, + const struct cpumask *cpumask) +{ + const struct sched_group_energy * const sge = fn(cpu); + struct cpumask mask; + int i; + + if (cpumask_weight(cpumask) <= 1) + return; + + cpumask_xor(&mask, cpumask, get_cpu_mask(cpu)); + + for_each_cpu(i, &mask) { + const struct sched_group_energy * const e = fn(i); + int y; + + BUG_ON(e->nr_idle_states != sge->nr_idle_states); + + for (y = 0; y < (e->nr_idle_states); y++) { + BUG_ON(e->idle_states[y].power != + sge->idle_states[y].power); + } + + BUG_ON(e->nr_cap_states != sge->nr_cap_states); + + for (y = 0; y < (e->nr_cap_states); y++) { + BUG_ON(e->cap_states[y].cap != sge->cap_states[y].cap); + BUG_ON(e->cap_states[y].power != + sge->cap_states[y].power); + } + } +} + +static void init_sched_energy(int cpu, struct sched_domain *sd, + sched_domain_energy_f fn) +{ + if (!(fn && fn(cpu))) + return; + + if (cpu != group_balance_cpu(sd->groups)) + return; + + if (sd->child && !sd->child->groups->sge) { + pr_err("BUG: EAS setup broken for CPU%d\n", cpu); +#ifdef CONFIG_SCHED_DEBUG + pr_err(" energy data on %s but not on %s domain\n", + sd->name, sd->child->name); +#endif + return; + } + + check_sched_energy_data(cpu, fn, sched_group_cpus(sd->groups)); + + sd->groups->sge = fn(cpu); +} + /* * Initializers for schedule domains * Non-inlined to reduce accumulated stack pressure in build_sched_domains() @@ -7036,10 +7096,13 @@ static int build_sched_domains(const struct cpumask *cpu_map, /* Calculate CPU capacity for physical packages and nodes */ for (i = nr_cpumask_bits-1; i >= 0; i--) { + struct sched_domain_topology_level *tl = sched_domain_topology; + if (!cpumask_test_cpu(i, cpu_map)) continue; - for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { + for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) { + init_sched_energy(i, sd, tl->energy); claim_allocations(i, sd); init_sched_groups_capacity(i, sd); } -- GitLab From 858d718840feb4d668847edbd358abc3fe5867ca Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Tue, 13 Jan 2015 13:50:46 +0000 Subject: [PATCH 1118/1442] ANDROID: sched: Introduce SD_SHARE_CAP_STATES sched_domain flag cpufreq is currently keeping it a secret which cpus are sharing clock source. The scheduler needs to know about clock domains as well to become more energy aware. The SD_SHARE_CAP_STATES domain flag indicates whether cpus belonging to the sched_domain share capacity states (P-states). There is no connection with cpufreq (yet). The flag must be set by the arch specific topology code. cc: Russell King cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- arch/arm/kernel/topology.c | 3 ++- include/linux/sched.h | 1 + kernel/sched/core.c | 10 +++++++--- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 53ce7f6306cc..489e63a2e1df 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -277,7 +277,8 @@ void store_cpu_topology(unsigned int cpuid) static inline int cpu_corepower_flags(void) { - return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; + return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN | \ + SD_SHARE_CAP_STATES; } static struct sched_domain_topology_level arm_topology[] = { diff --git a/include/linux/sched.h b/include/linux/sched.h index 1a903e48b92b..211de1d8fab9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1035,6 +1035,7 @@ extern void wake_up_q(struct wake_q_head *head); #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ #define SD_NUMA 0x4000 /* cross-node balancing */ +#define SD_SHARE_CAP_STATES 0x8000 /* Domain members share capacity state */ #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8d30289b8cea..ab072a5d8c15 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5771,7 +5771,8 @@ static int sd_degenerate(struct sched_domain *sd) SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | - SD_SHARE_POWERDOMAIN)) { + SD_SHARE_POWERDOMAIN | + SD_SHARE_CAP_STATES)) { if (sd->groups != sd->groups->next) return 0; } @@ -5804,7 +5805,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_PREFER_SIBLING | - SD_SHARE_POWERDOMAIN); + SD_SHARE_POWERDOMAIN | + SD_SHARE_CAP_STATES); if (nr_node_ids == 1) pflags &= ~SD_SERIALIZE; } @@ -6487,6 +6489,7 @@ static int sched_domains_curr_level; * SD_NUMA - describes NUMA topologies * SD_SHARE_POWERDOMAIN - describes shared power domain * SD_ASYM_CPUCAPACITY - describes mixed capacity topologies + * SD_SHARE_CAP_STATES - describes shared capacity states * * Odd one out, which beside describing the topology has a quirk also * prescribes the desired behaviour that goes along with it: @@ -6499,7 +6502,8 @@ static int sched_domains_curr_level; SD_NUMA | \ SD_ASYM_PACKING | \ SD_ASYM_CPUCAPACITY | \ - SD_SHARE_POWERDOMAIN) + SD_SHARE_POWERDOMAIN | \ + SD_SHARE_CAP_STATES) static struct sched_domain * sd_init(struct sched_domain_topology_level *tl, -- GitLab From b4ca4bcfe1c4194a7c19c265bbbd4b0a0e095fcb Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Fri, 10 Jul 2015 13:57:19 +0100 Subject: [PATCH 1119/1442] ANDROID: arm: Cpu invariant scheduler load-tracking and capacity support Provides the scheduler with a cpu scaling correction factor for more accurate load-tracking and cpu capacity handling. The Energy Model (EM) (in fact the capacity value of the last element of the capacity states vector of the core (MC) level sched_group_energy structure) is used instead of the arm arch specific cpu_efficiency and dtb property 'clock-frequency' values as the source for this cpu scaling factor. The cpu capacity value depends on the micro-architecture and the maximum frequency of the cpu. The maximum frequency part should not be confused with the frequency invariant scheduler load-tracking support which deals with frequency related scaling due to DFVS functionality. Signed-off-by: Juri Lelli Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- arch/arm/kernel/topology.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 489e63a2e1df..b2cefdf56536 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -153,6 +153,8 @@ static void __init parse_dt_topology(void) } +static const struct sched_group_energy * const cpu_core_energy(int cpu); + /* * Look for a customed capacity of a CPU in the cpu_capacity table during the * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the @@ -160,10 +162,14 @@ static void __init parse_dt_topology(void) */ static void update_cpu_capacity(unsigned int cpu) { - if (!cpu_capacity(cpu)) - return; + unsigned long capacity = SCHED_CAPACITY_SCALE; + + if (cpu_core_energy(cpu)) { + int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1; + capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap; + } - set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); + set_capacity_scale(cpu, capacity); pr_info("CPU%u: update cpu_capacity %lu\n", cpu, arch_scale_cpu_capacity(NULL, cpu)); -- GitLab From 5e900d488428a6ac0b43dee7d2f1b79de0725cee Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Thu, 30 Apr 2015 11:53:48 +0100 Subject: [PATCH 1120/1442] ANDROID: arm64: Cpu invariant scheduler load-tracking and capacity support Provides the scheduler with a cpu scaling correction factor for more accurate load-tracking and cpu capacity handling. The Energy Model (EM) (in fact the capacity value of the last element of the capacity states vector of the core (MC) level sched_group_energy structure) is used as the source for this cpu scaling factor. The cpu capacity value depends on the micro-architecture and the maximum frequency of the cpu. The maximum frequency part should not be confused with the frequency invariant scheduler load-tracking support which deals with frequency related scaling due to DFVS functionality. Signed-off-by: Juri Lelli Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- arch/arm64/include/asm/topology.h | 4 ++- arch/arm64/kernel/topology.c | 42 +++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index f05b626f17c7..237eaa0d5cf7 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -31,11 +31,13 @@ int pcibus_to_node(struct pci_bus *bus); cpumask_of_node(pcibus_to_node(bus))) #endif /* CONFIG_NUMA */ +struct sched_domain; #ifdef CONFIG_CPU_FREQ #define arch_scale_freq_capacity cpufreq_scale_freq_capacity -struct sched_domain; extern unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu); #endif +#define arch_scale_cpu_capacity scale_cpu_capacity +extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu); #include diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 694f6deedbab..fb99a6735fd4 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -23,6 +23,18 @@ #include #include +static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; + +unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +static void set_capacity_scale(unsigned int cpu, unsigned long capacity) +{ + per_cpu(cpu_scale, cpu) = capacity; +} + static int __init get_cpu_for_node(struct device_node *node) { struct device_node *cpu_node; @@ -211,6 +223,35 @@ const struct cpumask *cpu_coregroup_mask(int cpu) return &cpu_topology[cpu].core_sibling; } +static inline int cpu_corepower_flags(void) +{ + return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN | \ + SD_SHARE_CAP_STATES; +} + +static struct sched_domain_topology_level arm64_topology[] = { +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) }, +#endif + { cpu_cpu_mask, NULL, cpu_cluster_energy, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + +static void update_cpu_capacity(unsigned int cpu) +{ + unsigned long capacity = SCHED_CAPACITY_SCALE; + + if (cpu_core_energy(cpu)) { + int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1; + capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap; + } + + set_capacity_scale(cpu, capacity); + + pr_info("CPU%d: update cpu_capacity %lu\n", + cpu, arch_scale_cpu_capacity(NULL, cpu)); +} + static void update_siblings_masks(unsigned int cpuid) { struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; @@ -272,6 +313,7 @@ void store_cpu_topology(unsigned int cpuid) topology_populated: update_siblings_masks(cpuid); + update_cpu_capacity(cpuid); } static void __init reset_cpu_topology(void) -- GitLab From 4f569991b1d9660ba6eda4b43f82674ca46a089a Mon Sep 17 00:00:00 2001 From: Robin Randhawa Date: Tue, 9 Jun 2015 15:10:00 +0100 Subject: [PATCH 1121/1442] ANDROID: arm64, topology: Updates to use DT bindings for EAS costing data With the bindings and the associated accessors to extract data from the bindings in place, remove the static hard-coded data from topology.c and use the accesors instead. Signed-off-by: Robin Randhawa Signed-off-by: Andres Oportus --- arch/arm64/kernel/topology.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index fb99a6735fd4..b5b43af6a7dc 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -218,6 +220,33 @@ static int __init parse_dt_topology(void) struct cpu_topology cpu_topology[NR_CPUS]; EXPORT_SYMBOL_GPL(cpu_topology); +/* sd energy functions */ +static inline +const struct sched_group_energy * const cpu_cluster_energy(int cpu) +{ + struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL1]; + + if (!sge) { + pr_warn("Invalid sched_group_energy for Cluster%d\n", cpu); + return NULL; + } + + return sge; +} + +static inline +const struct sched_group_energy * const cpu_core_energy(int cpu) +{ + struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL0]; + + if (!sge) { + pr_warn("Invalid sched_group_energy for CPU%d\n", cpu); + return NULL; + } + + return sge; +} + const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_topology[cpu].core_sibling; @@ -344,4 +373,8 @@ void __init init_cpu_topology(void) */ if (of_have_populated_dt() && parse_dt_topology()) reset_cpu_topology(); + else + set_sched_topology(arm64_topology); + + init_sched_energy_costs(); } -- GitLab From 1f998b35937975a018f9a3cdc00c0fd95155ffb7 Mon Sep 17 00:00:00 2001 From: Robin Randhawa Date: Mon, 29 Jun 2015 18:01:58 +0100 Subject: [PATCH 1122/1442] ANDROID: sched: Support for extracting EAS energy costs from DT This patch implements support for extracting energy cost data from DT. The data should conform to the DT bindings for energy cost data needed by EAS (energy aware scheduling). Signed-off-by: Robin Randhawa Signed-off-by: Andres Oportus --- include/linux/sched_energy.h | 36 ++++++++++ kernel/sched/Makefile | 2 +- kernel/sched/energy.c | 124 +++++++++++++++++++++++++++++++++++ 3 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 include/linux/sched_energy.h create mode 100644 kernel/sched/energy.c diff --git a/include/linux/sched_energy.h b/include/linux/sched_energy.h new file mode 100644 index 000000000000..a3f1627ac609 --- /dev/null +++ b/include/linux/sched_energy.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_SCHED_ENERGY_H +#define _LINUX_SCHED_ENERGY_H + +#include +#include + +/* + * There doesn't seem to be an NR_CPUS style max number of sched domain + * levels so here's an arbitrary constant one for the moment. + * + * The levels alluded to here correspond to entries in struct + * sched_domain_topology_level that are meant to be populated by arch + * specific code (topology.c). + */ +#define NR_SD_LEVELS 8 + +#define SD_LEVEL0 0 +#define SD_LEVEL1 1 +#define SD_LEVEL2 2 +#define SD_LEVEL3 3 +#define SD_LEVEL4 4 +#define SD_LEVEL5 5 +#define SD_LEVEL6 6 +#define SD_LEVEL7 7 + +/* + * Convenience macro for iterating through said sd levels. + */ +#define for_each_possible_sd_level(level) \ + for (level = 0; level < NR_SD_LEVELS; level++) + +extern struct sched_group_energy *sge_array[NR_CPUS][NR_SD_LEVELS]; + +void init_sched_energy_costs(void); + +#endif diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 5e59b832ae2b..8fdb2850564d 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -16,7 +16,7 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer endif obj-y += core.o loadavg.o clock.o cputime.o -obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o +obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o energy.o obj-y += wait.o swait.o completion.o idle.o obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o diff --git a/kernel/sched/energy.c b/kernel/sched/energy.c new file mode 100644 index 000000000000..b0656b7a93e3 --- /dev/null +++ b/kernel/sched/energy.c @@ -0,0 +1,124 @@ +/* + * Obtain energy cost data from DT and populate relevant scheduler data + * structures. + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#define pr_fmt(fmt) "sched-energy: " fmt + +#define DEBUG + +#include +#include +#include +#include +#include +#include + +struct sched_group_energy *sge_array[NR_CPUS][NR_SD_LEVELS]; + +static void free_resources(void) +{ + int cpu, sd_level; + struct sched_group_energy *sge; + + for_each_possible_cpu(cpu) { + for_each_possible_sd_level(sd_level) { + sge = sge_array[cpu][sd_level]; + if (sge) { + kfree(sge->cap_states); + kfree(sge->idle_states); + kfree(sge); + } + } + } +} + +void init_sched_energy_costs(void) +{ + struct device_node *cn, *cp; + struct capacity_state *cap_states; + struct idle_state *idle_states; + struct sched_group_energy *sge; + const struct property *prop; + int sd_level, i, nstates, cpu; + const __be32 *val; + + for_each_possible_cpu(cpu) { + cn = of_get_cpu_node(cpu, NULL); + if (!cn) { + pr_warn("CPU device node missing for CPU %d\n", cpu); + return; + } + + if (!of_find_property(cn, "sched-energy-costs", NULL)) { + pr_warn("CPU device node has no sched-energy-costs\n"); + return; + } + + for_each_possible_sd_level(sd_level) { + cp = of_parse_phandle(cn, "sched-energy-costs", sd_level); + if (!cp) + break; + + prop = of_find_property(cp, "busy-cost-data", NULL); + if (!prop || !prop->value) { + pr_warn("No busy-cost data, skipping sched_energy init\n"); + goto out; + } + + sge = kcalloc(1, sizeof(struct sched_group_energy), + GFP_NOWAIT); + + nstates = (prop->length / sizeof(u32)) / 2; + cap_states = kcalloc(nstates, + sizeof(struct capacity_state), + GFP_NOWAIT); + + for (i = 0, val = prop->value; i < nstates; i++) { + cap_states[i].cap = be32_to_cpup(val++); + cap_states[i].power = be32_to_cpup(val++); + } + + sge->nr_cap_states = nstates; + sge->cap_states = cap_states; + + prop = of_find_property(cp, "idle-cost-data", NULL); + if (!prop || !prop->value) { + pr_warn("No idle-cost data, skipping sched_energy init\n"); + goto out; + } + + nstates = (prop->length / sizeof(u32)); + idle_states = kcalloc(nstates, + sizeof(struct idle_state), + GFP_NOWAIT); + + for (i = 0, val = prop->value; i < nstates; i++) + idle_states[i].power = be32_to_cpup(val++); + + sge->nr_idle_states = nstates; + sge->idle_states = idle_states; + + sge_array[cpu][sd_level] = sge; + } + } + + pr_info("Sched-energy-costs installed from DT\n"); + return; + +out: + free_resources(); +} -- GitLab From 8b35ef456fb467a3023ae8ffcd8e25a729f2abe9 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Tue, 13 Jan 2015 14:11:28 +0000 Subject: [PATCH 1123/1442] ANDROID: sched: Compute cpu capacity available at current frequency capacity_orig_of() returns the max available compute capacity of a cpu. For scale-invariant utilization tracking and energy-aware scheduling decisions it is useful to know the compute capacity available at the current OPP of a cpu. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 135be535a4ef..f17279e4dc36 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5104,6 +5104,17 @@ static void record_wakee(struct task_struct *p) } } +/* + * Returns the current capacity of cpu after applying both + * cpu and freq scaling. + */ +static unsigned long capacity_curr_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity_orig * + arch_scale_freq_capacity(NULL, cpu) + >> SCHED_CAPACITY_SHIFT; +} + static inline bool energy_aware(void) { return sched_feat(ENERGY_AWARE); -- GitLab From 9978d1393fc93df3664326fb26c4a1c42469b94a Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Thu, 11 Dec 2014 15:25:29 +0000 Subject: [PATCH 1124/1442] ANDROID: sched: Relocated cpu_util() and change return type Move cpu_util() to an earlier position in fair.c and change return type to unsigned long as negative usage doesn't make much sense. All other load and capacity related functions use unsigned long including the caller of cpu_util(). cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 70 ++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f17279e4dc36..462553703e3e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5115,6 +5115,40 @@ static unsigned long capacity_curr_of(int cpu) >> SCHED_CAPACITY_SHIFT; } +/* + * cpu_util returns the amount of capacity of a CPU that is used by CFS + * tasks. The unit of the return value must be the one of capacity so we can + * compare the utilization with the capacity of the CPU that is available for + * CFS task (ie cpu_capacity). + * + * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the + * recent utilization of currently non-runnable tasks on a CPU. It represents + * the amount of utilization of a CPU in the range [0..capacity_orig] where + * capacity_orig is the cpu_capacity available at the highest frequency + * (arch_scale_freq_capacity()). + * The utilization of a CPU converges towards a sum equal to or less than the + * current capacity (capacity_curr <= capacity_orig) of the CPU because it is + * the running time on this CPU scaled by capacity_curr. + * + * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even + * higher than capacity_orig because of unfortunate rounding in + * cfs.avg.util_avg or just after migrating tasks and new task wakeups until + * the average stabilizes with the new running time. We need to check that the + * utilization stays within the range of [0..capacity_orig] and cap it if + * necessary. Without utilization capping, a group could be seen as overloaded + * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of + * available capacity. We allow utilization to overshoot capacity_curr (but not + * capacity_orig) as it useful for predicting the capacity required after task + * migrations (scheduler-driven DVFS). + */ +static unsigned long cpu_util(int cpu) +{ + unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; + unsigned long capacity = capacity_orig_of(cpu); + + return (util >= capacity) ? capacity : util; +} + static inline bool energy_aware(void) { return sched_feat(ENERGY_AWARE); @@ -5244,8 +5278,6 @@ static inline bool task_fits_max(struct task_struct *p, int cpu) return __task_fits(p, cpu, 0); } -static int cpu_util(int cpu); - static inline bool task_fits_spare(struct task_struct *p, int cpu) { return __task_fits(p, cpu, cpu_util(cpu)); @@ -5643,40 +5675,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) return target; } -/* - * cpu_util returns the amount of capacity of a CPU that is used by CFS - * tasks. The unit of the return value must be the one of capacity so we can - * compare the utilization with the capacity of the CPU that is available for - * CFS task (ie cpu_capacity). - * - * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the - * recent utilization of currently non-runnable tasks on a CPU. It represents - * the amount of utilization of a CPU in the range [0..capacity_orig] where - * capacity_orig is the cpu_capacity available at the highest frequency - * (arch_scale_freq_capacity()). - * The utilization of a CPU converges towards a sum equal to or less than the - * current capacity (capacity_curr <= capacity_orig) of the CPU because it is - * the running time on this CPU scaled by capacity_curr. - * - * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even - * higher than capacity_orig because of unfortunate rounding in - * cfs.avg.util_avg or just after migrating tasks and new task wakeups until - * the average stabilizes with the new running time. We need to check that the - * utilization stays within the range of [0..capacity_orig] and cap it if - * necessary. Without utilization capping, a group could be seen as overloaded - * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of - * available capacity. We allow utilization to overshoot capacity_curr (but not - * capacity_orig) as it useful for predicting the capacity required after task - * migrations (scheduler-driven DVFS). - */ -static int cpu_util(int cpu) -{ - unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; - unsigned long capacity = capacity_orig_of(cpu); - - return (util >= capacity) ? capacity : util; -} - /* * select_task_rq_fair: Select target runqueue for the waking task in domains * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, -- GitLab From 30786a0ac3e811b1cd26bb984e7714073f031aae Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Fri, 2 Jan 2015 17:08:52 +0000 Subject: [PATCH 1125/1442] ANDROID: sched: Highest energy aware balancing sched_domain level pointer Add another member to the family of per-cpu sched_domain shortcut pointers. This one, sd_ea, points to the highest level at which energy model is provided. At this level and all levels below all sched_groups have energy model data attached. Partial energy model information is possible but restricted to providing energy model data for lower level sched_domains (sd_ea and below) and leaving load-balancing on levels above to non-energy-aware load-balancing. For example, it is possible to apply energy-aware scheduling within each socket on a multi-socket system and let normal scheduling handle load-balancing between sockets. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/core.c | 10 ++++++++++ kernel/sched/sched.h | 1 + 2 files changed, 11 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ab072a5d8c15..1dca8e45e4bd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5996,11 +5996,13 @@ DEFINE_PER_CPU(int, sd_llc_id); DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); DEFINE_PER_CPU(struct sched_domain *, sd_numa); DEFINE_PER_CPU(struct sched_domain *, sd_asym); +DEFINE_PER_CPU(struct sched_domain *, sd_ea); static void update_top_cache_domain(int cpu) { struct sched_domain_shared *sds = NULL; struct sched_domain *sd; + struct sched_domain *ea_sd = NULL; int id = cpu; int size = 1; @@ -6021,6 +6023,14 @@ static void update_top_cache_domain(int cpu) sd = highest_flag_domain(cpu, SD_ASYM_PACKING); rcu_assign_pointer(per_cpu(sd_asym, cpu), sd); + + for_each_domain(cpu, sd) { + if (sd->groups->sge) + ea_sd = sd; + else + break; + } + rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd); } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index be18b1234d6d..2e0f325c67a3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -885,6 +885,7 @@ DECLARE_PER_CPU(int, sd_llc_id); DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); DECLARE_PER_CPU(struct sched_domain *, sd_numa); DECLARE_PER_CPU(struct sched_domain *, sd_asym); +DECLARE_PER_CPU(struct sched_domain *, sd_ea); struct sched_group_capacity { atomic_t ref; -- GitLab From 61bf6252e5f4cb7e8414c03038e64eda04faea04 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Thu, 18 Dec 2014 14:47:18 +0000 Subject: [PATCH 1126/1442] ANDROID: sched: Calculate energy consumption of sched_group For energy-aware load-balancing decisions it is necessary to know the energy consumption estimates of groups of cpus. This patch introduces a basic function, sched_group_energy(), which estimates the energy consumption of the cpus in the group and any resources shared by the members of the group. NOTE: The function has five levels of identation and breaks the 80 character limit. Refactoring is necessary. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/core.c | 4 ++ kernel/sched/fair.c | 156 +++++++++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 1 + 3 files changed, 161 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1dca8e45e4bd..b231ed3e6c74 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5997,6 +5997,7 @@ DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); DEFINE_PER_CPU(struct sched_domain *, sd_numa); DEFINE_PER_CPU(struct sched_domain *, sd_asym); DEFINE_PER_CPU(struct sched_domain *, sd_ea); +DEFINE_PER_CPU(struct sched_domain *, sd_scs); static void update_top_cache_domain(int cpu) { @@ -6031,6 +6032,9 @@ static void update_top_cache_domain(int cpu) break; } rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd); + + sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES); + rcu_assign_pointer(per_cpu(sd_scs, cpu), sd); } /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 462553703e3e..2fc5e3077c08 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5154,6 +5154,162 @@ static inline bool energy_aware(void) return sched_feat(ENERGY_AWARE); } +/* + * cpu_norm_util() returns the cpu util relative to a specific capacity, + * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for + * energy calculations. Using the scale-invariant util returned by + * cpu_util() and approximating scale-invariant util by: + * + * util ~ (curr_freq/max_freq)*1024 * capacity_orig/1024 * running_time/time + * + * the normalized util can be found using the specific capacity. + * + * capacity = capacity_orig * curr_freq/max_freq + * + * norm_util = running_time/time ~ util/capacity + */ +static unsigned long cpu_norm_util(int cpu, unsigned long capacity) +{ + int util = cpu_util(cpu); + + if (util >= capacity) + return SCHED_CAPACITY_SCALE; + + return (util << SCHED_CAPACITY_SHIFT)/capacity; +} + +static unsigned long group_max_util(struct sched_group *sg) +{ + int i; + unsigned long max_util = 0; + + for_each_cpu(i, sched_group_cpus(sg)) + max_util = max(max_util, cpu_util(i)); + + return max_util; +} + +/* + * group_norm_util() returns the approximated group util relative to it's + * current capacity (busy ratio) in the range [0..SCHED_LOAD_SCALE] for use in + * energy calculations. Since task executions may or may not overlap in time in + * the group the true normalized util is between max(cpu_norm_util(i)) and + * sum(cpu_norm_util(i)) when iterating over all cpus in the group, i. The + * latter is used as the estimate as it leads to a more pessimistic energy + * estimate (more busy). + */ +static unsigned long group_norm_util(struct sched_group *sg, int cap_idx) +{ + int i; + unsigned long util_sum = 0; + unsigned long capacity = sg->sge->cap_states[cap_idx].cap; + + for_each_cpu(i, sched_group_cpus(sg)) + util_sum += cpu_norm_util(i, capacity); + + if (util_sum > SCHED_CAPACITY_SCALE) + return SCHED_CAPACITY_SCALE; + return util_sum; +} + +static int find_new_capacity(struct sched_group *sg, + const struct sched_group_energy const *sge) +{ + int idx; + unsigned long util = group_max_util(sg); + + for (idx = 0; idx < sge->nr_cap_states; idx++) { + if (sge->cap_states[idx].cap >= util) + return idx; + } + + return idx; +} + +/* + * sched_group_energy(): Computes the absolute energy consumption of cpus + * belonging to the sched_group including shared resources shared only by + * members of the group. Iterates over all cpus in the hierarchy below the + * sched_group starting from the bottom working it's way up before going to + * the next cpu until all cpus are covered at all levels. The current + * implementation is likely to gather the same util statistics multiple times. + * This can probably be done in a faster but more complex way. + * Note: sched_group_energy() may fail when racing with sched_domain updates. + */ +static int sched_group_energy(struct sched_group *sg_top) +{ + struct sched_domain *sd; + int cpu, total_energy = 0; + struct cpumask visit_cpus; + struct sched_group *sg; + + WARN_ON(!sg_top->sge); + + cpumask_copy(&visit_cpus, sched_group_cpus(sg_top)); + + while (!cpumask_empty(&visit_cpus)) { + struct sched_group *sg_shared_cap = NULL; + + cpu = cpumask_first(&visit_cpus); + + /* + * Is the group utilization affected by cpus outside this + * sched_group? + */ + sd = rcu_dereference(per_cpu(sd_scs, cpu)); + + if (!sd) + /* + * We most probably raced with hotplug; returning a + * wrong energy estimation is better than entering an + * infinite loop. + */ + return -EINVAL; + + if (sd->parent) + sg_shared_cap = sd->parent->groups; + + for_each_domain(cpu, sd) { + sg = sd->groups; + + /* Has this sched_domain already been visited? */ + if (sd->child && group_first_cpu(sg) != cpu) + break; + + do { + struct sched_group *sg_cap_util; + unsigned long group_util; + int sg_busy_energy, sg_idle_energy, cap_idx; + + if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight) + sg_cap_util = sg_shared_cap; + else + sg_cap_util = sg; + + cap_idx = find_new_capacity(sg_cap_util, sg->sge); + group_util = group_norm_util(sg, cap_idx); + sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) + >> SCHED_CAPACITY_SHIFT; + sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util) * sg->sge->idle_states[0].power) + >> SCHED_CAPACITY_SHIFT; + + total_energy += sg_busy_energy + sg_idle_energy; + + if (!sd->child) + cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg)); + + if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(sg_top))) + goto next_cpu; + + } while (sg = sg->next, sg != sd->groups); + } +next_cpu: + continue; + } + + return total_energy; +} + /* * Detect M:N waker/wakee relationships via a switching-frequency heuristic. * diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 2e0f325c67a3..92d52b6398ac 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -886,6 +886,7 @@ DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); DECLARE_PER_CPU(struct sched_domain *, sd_numa); DECLARE_PER_CPU(struct sched_domain *, sd_asym); DECLARE_PER_CPU(struct sched_domain *, sd_ea); +DECLARE_PER_CPU(struct sched_domain *, sd_scs); struct sched_group_capacity { atomic_t ref; -- GitLab From a455fa7b08faffe857d82dd589cea5fc66407210 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Fri, 2 Jan 2015 14:21:56 +0000 Subject: [PATCH 1127/1442] ANDROID: sched: Extend sched_group_energy to test load-balancing decisions Extended sched_group_energy() to support energy prediction with usage (tasks) added/removed from a specific cpu or migrated between a pair of cpus. Useful for load-balancing decision making. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 90 +++++++++++++++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 27 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2fc5e3077c08..a9670e37673e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5141,12 +5141,21 @@ static unsigned long capacity_curr_of(int cpu) * capacity_orig) as it useful for predicting the capacity required after task * migrations (scheduler-driven DVFS). */ -static unsigned long cpu_util(int cpu) +static unsigned long __cpu_util(int cpu, int delta) { unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; unsigned long capacity = capacity_orig_of(cpu); - return (util >= capacity) ? capacity : util; + delta += util; + if (delta < 0) + return 0; + + return (delta >= capacity) ? capacity : delta; +} + +static unsigned long cpu_util(int cpu) +{ + return __cpu_util(cpu, 0); } static inline bool energy_aware(void) @@ -5154,8 +5163,18 @@ static inline bool energy_aware(void) return sched_feat(ENERGY_AWARE); } +struct energy_env { + struct sched_group *sg_top; + struct sched_group *sg_cap; + int cap_idx; + int util_delta; + int src_cpu; + int dst_cpu; + int energy; +}; + /* - * cpu_norm_util() returns the cpu util relative to a specific capacity, + * __cpu_norm_util() returns the cpu util relative to a specific capacity, * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for * energy calculations. Using the scale-invariant util returned by * cpu_util() and approximating scale-invariant util by: @@ -5168,9 +5187,9 @@ static inline bool energy_aware(void) * * norm_util = running_time/time ~ util/capacity */ -static unsigned long cpu_norm_util(int cpu, unsigned long capacity) +static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta) { - int util = cpu_util(cpu); + int util = __cpu_util(cpu, delta); if (util >= capacity) return SCHED_CAPACITY_SCALE; @@ -5178,13 +5197,25 @@ static unsigned long cpu_norm_util(int cpu, unsigned long capacity) return (util << SCHED_CAPACITY_SHIFT)/capacity; } -static unsigned long group_max_util(struct sched_group *sg) +static int calc_util_delta(struct energy_env *eenv, int cpu) { - int i; + if (cpu == eenv->src_cpu) + return -eenv->util_delta; + if (cpu == eenv->dst_cpu) + return eenv->util_delta; + return 0; +} + +static +unsigned long group_max_util(struct energy_env *eenv) +{ + int i, delta; unsigned long max_util = 0; - for_each_cpu(i, sched_group_cpus(sg)) - max_util = max(max_util, cpu_util(i)); + for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) { + delta = calc_util_delta(eenv, i); + max_util = max(max_util, __cpu_util(i, delta)); + } return max_util; } @@ -5198,31 +5229,36 @@ static unsigned long group_max_util(struct sched_group *sg) * latter is used as the estimate as it leads to a more pessimistic energy * estimate (more busy). */ -static unsigned long group_norm_util(struct sched_group *sg, int cap_idx) +static unsigned +long group_norm_util(struct energy_env *eenv, struct sched_group *sg) { - int i; + int i, delta; unsigned long util_sum = 0; - unsigned long capacity = sg->sge->cap_states[cap_idx].cap; + unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap; - for_each_cpu(i, sched_group_cpus(sg)) - util_sum += cpu_norm_util(i, capacity); + for_each_cpu(i, sched_group_cpus(sg)) { + delta = calc_util_delta(eenv, i); + util_sum += __cpu_norm_util(i, capacity, delta); + } if (util_sum > SCHED_CAPACITY_SCALE) return SCHED_CAPACITY_SCALE; return util_sum; } -static int find_new_capacity(struct sched_group *sg, +static int find_new_capacity(struct energy_env *eenv, const struct sched_group_energy const *sge) { int idx; - unsigned long util = group_max_util(sg); + unsigned long util = group_max_util(eenv); for (idx = 0; idx < sge->nr_cap_states; idx++) { if (sge->cap_states[idx].cap >= util) - return idx; + break; } + eenv->cap_idx = idx; + return idx; } @@ -5236,16 +5272,16 @@ static int find_new_capacity(struct sched_group *sg, * This can probably be done in a faster but more complex way. * Note: sched_group_energy() may fail when racing with sched_domain updates. */ -static int sched_group_energy(struct sched_group *sg_top) +static int sched_group_energy(struct energy_env *eenv) { struct sched_domain *sd; int cpu, total_energy = 0; struct cpumask visit_cpus; struct sched_group *sg; - WARN_ON(!sg_top->sge); + WARN_ON(!eenv->sg_top->sge); - cpumask_copy(&visit_cpus, sched_group_cpus(sg_top)); + cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top)); while (!cpumask_empty(&visit_cpus)) { struct sched_group *sg_shared_cap = NULL; @@ -5277,17 +5313,16 @@ static int sched_group_energy(struct sched_group *sg_top) break; do { - struct sched_group *sg_cap_util; unsigned long group_util; int sg_busy_energy, sg_idle_energy, cap_idx; if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight) - sg_cap_util = sg_shared_cap; + eenv->sg_cap = sg_shared_cap; else - sg_cap_util = sg; + eenv->sg_cap = sg; - cap_idx = find_new_capacity(sg_cap_util, sg->sge); - group_util = group_norm_util(sg, cap_idx); + cap_idx = find_new_capacity(eenv, sg->sge); + group_util = group_norm_util(eenv, sg); sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) >> SCHED_CAPACITY_SHIFT; sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util) * sg->sge->idle_states[0].power) @@ -5298,7 +5333,7 @@ static int sched_group_energy(struct sched_group *sg_top) if (!sd->child) cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg)); - if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(sg_top))) + if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top))) goto next_cpu; } while (sg = sg->next, sg != sd->groups); @@ -5307,7 +5342,8 @@ static int sched_group_energy(struct sched_group *sg_top) continue; } - return total_energy; + eenv->energy = total_energy; + return 0; } /* -- GitLab From 931bd8235365661c5733b279e0cb53fda830ae89 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Tue, 6 Jan 2015 17:34:05 +0000 Subject: [PATCH 1128/1442] ANDROID: sched: Estimate energy impact of scheduling decisions Adds a generic energy-aware helper function, energy_diff(), that calculates energy impact of adding, removing, and migrating utilization in the system. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a9670e37673e..3f90ec9dacee 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5346,6 +5346,58 @@ static int sched_group_energy(struct energy_env *eenv) return 0; } +static inline bool cpu_in_sg(struct sched_group *sg, int cpu) +{ + return cpu != -1 && cpumask_test_cpu(cpu, sched_group_cpus(sg)); +} + +/* + * energy_diff(): Estimate the energy impact of changing the utilization + * distribution. eenv specifies the change: utilisation amount, source, and + * destination cpu. Source or destination cpu may be -1 in which case the + * utilization is removed from or added to the system (e.g. task wake-up). If + * both are specified, the utilization is migrated. + */ +static int energy_diff(struct energy_env *eenv) +{ + struct sched_domain *sd; + struct sched_group *sg; + int sd_cpu = -1, energy_before = 0, energy_after = 0; + + struct energy_env eenv_before = { + .util_delta = 0, + .src_cpu = eenv->src_cpu, + .dst_cpu = eenv->dst_cpu, + }; + + if (eenv->src_cpu == eenv->dst_cpu) + return 0; + + sd_cpu = (eenv->src_cpu != -1) ? eenv->src_cpu : eenv->dst_cpu; + sd = rcu_dereference(per_cpu(sd_ea, sd_cpu)); + + if (!sd) + return 0; /* Error */ + + sg = sd->groups; + + do { + if (cpu_in_sg(sg, eenv->src_cpu) || cpu_in_sg(sg, eenv->dst_cpu)) { + eenv_before.sg_top = eenv->sg_top = sg; + + if (sched_group_energy(&eenv_before)) + return 0; /* Invalid result abort */ + energy_before += eenv_before.energy; + + if (sched_group_energy(eenv)) + return 0; /* Invalid result abort */ + energy_after += eenv->energy; + } + } while (sg = sg->next, sg != sd->groups); + + return energy_after-energy_before; +} + /* * Detect M:N waker/wakee relationships via a switching-frequency heuristic. * -- GitLab From a562dfc4f79525c72df1e10e4a689b41bbed894e Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Sat, 9 May 2015 16:49:57 +0100 Subject: [PATCH 1129/1442] ANDROID: sched: Add over-utilization/tipping point indicator Energy-aware scheduling is only meant to be active while the system is _not_ over-utilized. That is, there are spare cycles available to shift tasks around based on their actual utilization to get a more energy-efficient task distribution without depriving any tasks. When above the tipping point task placement is done the traditional way based on load_avg, spreading the tasks across as many cpus as possible based on priority scaled load to preserve smp_nice. Below the tipping point we want to use util_avg instead. We need to define a criteria for when we make the switch. The util_avg for each cpu converges towards 100% (1024) regardless of how many task additional task we may put on it. If we define over-utilized as: sum_{cpus}(rq.cfs.avg.util_avg) + margin > sum_{cpus}(rq.capacity) some individual cpus may be over-utilized running multiple tasks even when the above condition is false. That should be okay as long as we try to spread the tasks out to avoid per-cpu over-utilization as much as possible and if all tasks have the _same_ priority. If the latter isn't true, we have to consider priority to preserve smp_nice. For example, we could have n_cpus nice=-10 util_avg=55% tasks and n_cpus/2 nice=0 util_avg=60% tasks. Balancing based on util_avg we are likely to end up with nice=-10 tasks sharing cpus and nice=0 tasks getting their own as we 1.5*n_cpus tasks in total and 55%+55% is less over-utilized than 55%+60% for those cpus that have to be shared. The system utilization is only 85% of the system capacity, but we are breaking smp_nice. To be sure not to break smp_nice, we have defined over-utilization conservatively as when any cpu in the system is fully utilized at it's highest frequency instead: cpu_rq(any).cfs.avg.util_avg + margin > cpu_rq(any).capacity IOW, as soon as one cpu is (nearly) 100% utilized, we switch to load_avg to factor in priority to preserve smp_nice. With this definition, we can skip periodic load-balance as no cpu has an always-running task when the system is not over-utilized. All tasks will be periodic and we can balance them at wake-up. This conservative condition does however mean that some scenarios that could benefit from energy-aware decisions even if one cpu is fully utilized would not get those benefits. For system where some cpus might have reduced capacity on some cpus (RT-pressure and/or big.LITTLE), we want periodic load-balance checks as soon a just a single cpu is fully utilized as it might one of those with reduced capacity and in that case we want to migrate it. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 31 +++++++++++++++++++++++++------ kernel/sched/sched.h | 3 +++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3f90ec9dacee..17de0b1c4d69 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4528,6 +4528,8 @@ static inline void hrtick_update(struct rq *rq) } #endif +static bool cpu_overutilized(int cpu); + /* * The enqueue_task method is called before nr_running is * increased. Here we update the fair scheduling stats and @@ -4538,6 +4540,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; + int task_new = !(flags & ENQUEUE_WAKEUP); /* * If in_iowait is set, the code below may not trigger any cpufreq @@ -4577,9 +4580,12 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) update_cfs_shares(cfs_rq); } - if (!se) + if (!se) { add_nr_running(rq, 1); - + if (!task_new && !rq->rd->overutilized && + cpu_overutilized(rq->cpu)) + rq->rd->overutilized = true; + } hrtick_update(rq); } @@ -7333,11 +7339,12 @@ group_type group_classify(struct sched_group *group, * @local_group: Does group contain this_cpu. * @sgs: variable to hold the statistics for this group. * @overload: Indicate more than one runnable task for any CPU. + * @overutilized: Indicate overutilization for any CPU. */ static inline void update_sg_lb_stats(struct lb_env *env, struct sched_group *group, int load_idx, int local_group, struct sg_lb_stats *sgs, - bool *overload) + bool *overload, bool *overutilized) { unsigned long load; int i, nr_running; @@ -7371,6 +7378,9 @@ static inline void update_sg_lb_stats(struct lb_env *env, */ if (!nr_running && idle_cpu(i)) sgs->idle_cpus++; + + if (cpu_overutilized(i)) + *overutilized = true; } /* Adjust by relative CPU capacity of the group */ @@ -7480,7 +7490,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd struct sched_group *sg = env->sd->groups; struct sg_lb_stats tmp_sgs; int load_idx, prefer_sibling = 0; - bool overload = false; + bool overload = false, overutilized = false; if (child && child->flags & SD_PREFER_SIBLING) prefer_sibling = 1; @@ -7502,7 +7512,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd } update_sg_lb_stats(env, sg, load_idx, local_group, sgs, - &overload); + &overload, &overutilized); if (local_group) goto next_group; @@ -7546,8 +7556,14 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd /* update overload indicator if we are at root domain */ if (env->dst_rq->rd->overload != overload) env->dst_rq->rd->overload = overload; - } + /* Update over-utilization (tipping point, U >= 0) indicator */ + if (env->dst_rq->rd->overutilized != overutilized) + env->dst_rq->rd->overutilized = overutilized; + } else { + if (!env->dst_rq->rd->overutilized && overutilized) + env->dst_rq->rd->overutilized = true; + } } /** @@ -8921,6 +8937,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) if (static_branch_unlikely(&sched_numa_balancing)) task_tick_numa(rq, curr); + + if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) + rq->rd->overutilized = true; } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 92d52b6398ac..690bfb6a89ea 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -557,6 +557,9 @@ struct root_domain { /* Indicate more than one runnable task for any CPU */ bool overload; + /* Indicate one or more cpus over-utilized (tipping point) */ + bool overutilized; + /* * The bit corresponding to a CPU gets set here if such CPU has more * than one runnable -deadline task (as it is below for RT tasks). -- GitLab From 06910641350c6bb0c88e5c76db9e532a4114e1d4 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Tue, 27 Jan 2015 13:48:07 +0000 Subject: [PATCH 1130/1442] ANDROID: sched, cpuidle: Track cpuidle state index in the scheduler The idle-state of each cpu is currently pointed to by rq->idle_state but there isn't any information in the struct cpuidle_state that can used to look up the idle-state energy model data stored in struct sched_group_energy. For this purpose is necessary to store the idle state index as well. Ideally, the idle-state data should be unified. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- drivers/cpuidle/cpuidle.c | 4 ++-- include/linux/cpuidle.h | 2 +- kernel/sched/idle.c | 3 ++- kernel/sched/sched.h | 21 +++++++++++++++++++++ 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index c73207abb5a4..78ab946d946a 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -192,7 +192,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, } /* Take note of the planned idle state. */ - sched_idle_set_state(target_state); + sched_idle_set_state(target_state, index); trace_cpu_idle_rcuidle(index, dev->cpu); time_start = ns_to_ktime(local_clock()); @@ -205,7 +205,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); /* The cpu is no longer idle or about to enter idle. */ - sched_idle_set_state(NULL); + sched_idle_set_state(NULL, -1); if (broadcast) { if (WARN_ON_ONCE(!irqs_disabled())) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index bb31373c3478..9a8eec9e59b2 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -207,7 +207,7 @@ static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv, #endif /* kernel/sched/idle.c */ -extern void sched_idle_set_state(struct cpuidle_state *idle_state); +extern void sched_idle_set_state(struct cpuidle_state *idle_state, int index); extern void default_idle_call(void); #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 1d8718d5300d..cf75f00f7037 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -23,9 +23,10 @@ extern char __cpuidle_text_start[], __cpuidle_text_end[]; * sched_idle_set_state - Record idle state for the current CPU. * @idle_state: State to record. */ -void sched_idle_set_state(struct cpuidle_state *idle_state) +void sched_idle_set_state(struct cpuidle_state *idle_state, int index) { idle_set_state(this_rq(), idle_state); + idle_set_state_idx(this_rq(), index); } static int __read_mostly cpu_idle_force_poll; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 690bfb6a89ea..86512afe0711 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -722,6 +722,7 @@ struct rq { #ifdef CONFIG_CPU_IDLE /* Must be inspected within a rcu lock section */ struct cpuidle_state *idle_state; + int idle_state_idx; #endif }; @@ -1329,6 +1330,17 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq) SCHED_WARN_ON(!rcu_read_lock_held()); return rq->idle_state; } + +static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx) +{ + rq->idle_state_idx = idle_state_idx; +} + +static inline int idle_get_state_idx(struct rq *rq) +{ + WARN_ON(!rcu_read_lock_held()); + return rq->idle_state_idx; +} #else static inline void idle_set_state(struct rq *rq, struct cpuidle_state *idle_state) @@ -1339,6 +1351,15 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq) { return NULL; } + +static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx) +{ +} + +static inline int idle_get_state_idx(struct rq *rq) +{ + return -1; +} #endif extern void sysrq_sched_debug_show(void); -- GitLab From 1f884f4351d7e44ba59e2119555f9c5f9dcd0b8e Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Tue, 27 Jan 2015 14:04:17 +0000 Subject: [PATCH 1131/1442] ANDROID: sched: Determine the current sched_group idle-state To estimate the energy consumption of a sched_group in sched_group_energy() it is necessary to know which idle-state the group is in when it is idle. For now, it is assumed that this is the current idle-state (though it might be wrong). Based on the individual cpu idle-states group_idle_state() finds the group idle-state. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 17de0b1c4d69..74549aba2efb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5268,6 +5268,20 @@ static int find_new_capacity(struct energy_env *eenv, return idx; } +static int group_idle_state(struct sched_group *sg) +{ + int i, state = INT_MAX; + + /* Find the shallowest idle state in the sched group. */ + for_each_cpu(i, sched_group_cpus(sg)) + state = min(state, idle_get_state_idx(cpu_rq(i))); + + /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */ + state++; + + return state; +} + /* * sched_group_energy(): Computes the absolute energy consumption of cpus * belonging to the sched_group including shared resources shared only by @@ -5320,7 +5334,8 @@ static int sched_group_energy(struct energy_env *eenv) do { unsigned long group_util; - int sg_busy_energy, sg_idle_energy, cap_idx; + int sg_busy_energy, sg_idle_energy; + int cap_idx, idle_idx; if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight) eenv->sg_cap = sg_shared_cap; @@ -5328,11 +5343,13 @@ static int sched_group_energy(struct energy_env *eenv) eenv->sg_cap = sg; cap_idx = find_new_capacity(eenv, sg->sge); + idle_idx = group_idle_state(sg); group_util = group_norm_util(eenv, sg); sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) - >> SCHED_CAPACITY_SHIFT; - sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util) * sg->sge->idle_states[0].power) - >> SCHED_CAPACITY_SHIFT; + >> SCHED_CAPACITY_SHIFT; + sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util) + * sg->sge->idle_states[idle_idx].power) + >> SCHED_CAPACITY_SHIFT; total_energy += sg_busy_energy + sg_idle_energy; -- GitLab From 4017a8e35c58520e2f2f1360561c1833b1611fad Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Sat, 9 May 2015 20:03:19 +0100 Subject: [PATCH 1132/1442] ANDROID: sched: Energy-aware wake-up task placement Let available compute capacity and estimated energy impact select wake-up target cpu when energy-aware scheduling is enabled and the system in not over-utilized (above the tipping point). energy_aware_wake_cpu() attempts to find group of cpus with sufficient compute capacity to accommodate the task and find a cpu with enough spare capacity to handle the task within that group. Preference is given to cpus with enough spare capacity at the current OPP. Finally, the energy impact of the new target and the previous task cpu is compared to select the wake-up target cpu. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 89 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 74549aba2efb..4456dbf71299 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5942,6 +5942,86 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) return target; } +static int energy_aware_wake_cpu(struct task_struct *p, int target) +{ + struct sched_domain *sd; + struct sched_group *sg, *sg_target; + int target_max_cap = INT_MAX; + int target_cpu = task_cpu(p); + int i; + + sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p))); + + if (!sd) + return target; + + sg = sd->groups; + sg_target = sg; + + /* + * Find group with sufficient capacity. We only get here if no cpu is + * overutilized. We may end up overutilizing a cpu by adding the task, + * but that should not be any worse than select_idle_sibling(). + * load_balance() should sort it out later as we get above the tipping + * point. + */ + do { + /* Assuming all cpus are the same in group */ + int max_cap_cpu = group_first_cpu(sg); + + /* + * Assume smaller max capacity means more energy-efficient. + * Ideally we should query the energy model for the right + * answer but it easily ends up in an exhaustive search. + */ + if (capacity_of(max_cap_cpu) < target_max_cap && + task_fits_max(p, max_cap_cpu)) { + sg_target = sg; + target_max_cap = capacity_of(max_cap_cpu); + } + } while (sg = sg->next, sg != sd->groups); + + /* Find cpu with sufficient capacity */ + for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) { + /* + * p's blocked utilization is still accounted for on prev_cpu + * so prev_cpu will receive a negative bias due to the double + * accounting. However, the blocked utilization may be zero. + */ + int new_util = cpu_util(i) + task_util(p); + + if (new_util > capacity_orig_of(i)) + continue; + + if (new_util < capacity_curr_of(i)) { + target_cpu = i; + if (cpu_rq(i)->nr_running) + break; + } + + /* cpu has capacity at higher OPP, keep it as fallback */ + if (target_cpu == task_cpu(p)) + target_cpu = i; + } + + if (target_cpu != task_cpu(p)) { + struct energy_env eenv = { + .util_delta = task_util(p), + .src_cpu = task_cpu(p), + .dst_cpu = target_cpu, + }; + + /* Not enough spare capacity on previous cpu */ + if (cpu_overutilized(task_cpu(p))) + return target_cpu; + + if (energy_diff(&eenv) >= 0) + return task_cpu(p); + } + + return target_cpu; +} + /* * select_task_rq_fair: Select target runqueue for the waking task in domains * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, @@ -5965,8 +6045,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (sd_flag & SD_BALANCE_WAKE) { record_wakee(p); - want_affine = !wake_wide(p) && task_fits_max(p, cpu) && - cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); + want_affine = (!wake_wide(p) && task_fits_max(p, cpu) && + cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) || + energy_aware(); } rcu_read_lock(); @@ -5997,7 +6078,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } if (!sd) { - if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */ + if (energy_aware() && !cpu_rq(cpu)->rd->overutilized) + new_cpu = energy_aware_wake_cpu(p, prev_cpu); + else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); } else while (sd) { -- GitLab From 53065e82a18753d2cb2e69637002c542a88918a0 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Sun, 10 May 2015 15:17:32 +0100 Subject: [PATCH 1133/1442] ANDROID: sched: Consider a not over-utilized energy-aware system as balanced In case the system operates below the tipping point indicator, introduced in ("sched: Add over-utilization/tipping point indicator"), bail out in find_busiest_group after the dst and src group statistics have been checked. There is simply no need to move usage around because all involved cpus still have spare cycles available. For an energy-aware system below its tipping point, we rely on the task placement of the wakeup path. This works well for short running tasks. The existence of long running tasks on one of the involved cpus lets the system operate over its tipping point. To be able to move such a task (whose load can't be used to average the load among the cpus) from a src cpu with lower capacity than the dst_cpu, an additional rule has to be implemented in need_active_balance. Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4456dbf71299..21ef486d5c57 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7882,6 +7882,10 @@ static struct sched_group *find_busiest_group(struct lb_env *env) * this level. */ update_sd_lb_stats(env, &sds); + + if (energy_aware() && !env->dst_rq->rd->overutilized) + goto out_balanced; + local = &sds.local_stat; busiest = &sds.busiest_stat; -- GitLab From f69e2dc550fa45c3008545e5a2e2bfd852ba0526 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Tue, 3 Feb 2015 13:54:11 +0000 Subject: [PATCH 1134/1442] ANDROID: sched: Disable energy-unfriendly nohz kicks With energy-aware scheduling enabled nohz_kick_needed() generates many nohz idle-balance kicks which lead to nothing when multiple tasks get packed on a single cpu to save energy. This causes unnecessary wake-ups and hence wastes energy. Make these conditions depend on !energy_aware() for now until the energy-aware nohz story gets sorted out. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 21ef486d5c57..2672e7e6934a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8928,12 +8928,13 @@ static inline bool nohz_kick_needed(struct rq *rq) if (time_before(now, nohz.next_balance)) return false; - if (rq->nr_running >= 2) + if (rq->nr_running >= 2 && + (!energy_aware() || cpu_overutilized(cpu))) return true; rcu_read_lock(); sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); - if (sds) { + if (sds && !energy_aware()) { /* * XXX: write a coherent comment on why we do this. * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com -- GitLab From 4eb92977b24f39bb37b0138b2ee51a23ed36aa72 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Tue, 22 Sep 2015 16:47:48 +0100 Subject: [PATCH 1135/1442] ANDROID: cpufreq: Max freq invariant scheduler load-tracking and cpu capacity support Implements cpufreq_scale_max_freq_capacity() to provide the scheduler with a maximum frequency scaling correction factor for more accurate load-tracking and cpu capacity handling by being able to deal with frequency capping. This scaling factor describes the influence of running a cpu with a current maximum frequency lower than the absolute possible maximum frequency on load tracking and cpu capacity. The factor is: current_max_freq(cpu) << SCHED_CAPACITY_SHIFT / max_freq(cpu) In fact, max_freq_scale should be a struct cpufreq_policy data member. But this would require that the scheduler hot path (__update_load_avg()) would have to grab the cpufreq lock. This can be avoided by using per-cpu data initialized to SCHED_CAPACITY_SCALE for max_freq_scale. Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- drivers/cpufreq/cpufreq.c | 19 +++++++++++++++++++ include/linux/cpufreq.h | 1 + 2 files changed, 20 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6f2dae7197a2..cbda4f836c8b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -306,12 +306,14 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) *********************************************************************/ static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; +static DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE; static void scale_freq_capacity(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs) { unsigned long cur = freqs ? freqs->new : policy->cur; unsigned long scale = (cur << SCHED_CAPACITY_SHIFT) / policy->max; + struct cpufreq_cpuinfo *cpuinfo = &policy->cpuinfo; int cpu; pr_debug("cpus %*pbl cur/cur max freq %lu/%u kHz freq scale %lu\n", @@ -319,6 +321,18 @@ scale_freq_capacity(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs) for_each_cpu(cpu, policy->cpus) per_cpu(freq_scale, cpu) = scale; + + if (freqs) + return; + + scale = (policy->max << SCHED_CAPACITY_SHIFT) / cpuinfo->max_freq; + + pr_debug("cpus %*pbl cur max/max freq %u/%u kHz max freq scale %lu\n", + cpumask_pr_args(policy->cpus), policy->max, cpuinfo->max_freq, + scale); + + for_each_cpu(cpu, policy->cpus) + per_cpu(max_freq_scale, cpu) = scale; } unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu) @@ -326,6 +340,11 @@ unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu) return per_cpu(freq_scale, cpu); } +unsigned long cpufreq_scale_max_freq_capacity(int cpu) +{ + return per_cpu(max_freq_scale, cpu); +} + static void __cpufreq_notify_transition(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs, unsigned int state) { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 0bb2d1258869..ebdc361e63bc 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -889,4 +889,5 @@ int cpufreq_generic_init(struct cpufreq_policy *policy, struct sched_domain; unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu); +unsigned long cpufreq_scale_max_freq_capacity(int cpu); #endif /* _LINUX_CPUFREQ_H */ -- GitLab From bbb138bdace76672ae8919887419e9f5f96bbeab Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Sat, 26 Sep 2015 18:19:54 +0100 Subject: [PATCH 1136/1442] ANDROID: sched: Update max cpu capacity in case of max frequency constraints Wakeup balancing uses cpu capacity awareness and needs to know the system-wide maximum cpu capacity. Patch "sched: Store system-wide maximum cpu capacity in root domain" finds the system-wide maximum cpu capacity during scheduler domain hierarchy setup. This is sufficient as long as maximum frequency invariance is not enabled. If it is enabled, the system-wide maximum cpu capacity can change between scheduler domain hierarchy setups due to frequency capping. The cpu capacity is changed in update_cpu_capacity() which is called in load balance on the lowest scheduler domain hierarchy level. To be able to know if a change in cpu capacity for a certain cpu also has an effect on the system-wide maximum cpu capacity it is normally necessary to iterate over all cpus. This would be way too costly. That's why this patch follows a different approach. The unsigned long max_cpu_capacity value in struct root_domain is replaced with a struct max_cpu_capacity, containing value (the max_cpu_capacity) and cpu (the cpu index of the cpu providing the maximum cpu_capacity). Changes to the system-wide maximum cpu capacity and the cpu index are made if: 1 System-wide maximum cpu capacity < cpu capacity 2 System-wide maximum cpu capacity > cpu capacity and cpu index == cpu There are no changes to the system-wide maximum cpu capacity in all other cases. Atomic read and write access to the pair (max_cpu_capacity.val, max_cpu_capacity.cpu) is enforced by max_cpu_capacity.lock. The access to max_cpu_capacity.val in task_fits_max() is still performed without taking the max_cpu_capacity.lock. The code to set max cpu capacity in build_sched_domains() has been removed because the whole functionality is now provided by update_cpu_capacity() instead. This approach can introduce errors temporarily, e.g. in case the cpu currently providing the max cpu capacity has its cpu capacity lowered due to frequency capping and calls update_cpu_capacity() before any cpu which might provide the max cpu now. There is also an outstanding question: Should the cpu capacity of a cpu going idle be set to a very small value? Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- kernel/sched/core.c | 12 ++---------- kernel/sched/fair.c | 32 +++++++++++++++++++++++++++++++- kernel/sched/sched.h | 11 ++++++++++- 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b231ed3e6c74..15a8e35f0198 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5885,6 +5885,8 @@ static int init_rootdomain(struct root_domain *rd) if (cpupri_init(&rd->cpupri) != 0) goto free_rto_mask; + + init_max_cpu_capacity(&rd->max_cpu_capacity); return 0; free_rto_mask: @@ -7131,20 +7133,10 @@ static int build_sched_domains(const struct cpumask *cpu_map, for_each_cpu(i, cpu_map) { rq = cpu_rq(i); sd = *per_cpu_ptr(d.sd, i); - - /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */ - if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity)) - WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig); - cpu_attach_domain(sd, d.rd, i); } rcu_read_unlock(); - if (rq && sched_debug_enabled) { - pr_info("span: %*pbl (max cpu_capacity = %lu)\n", - cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); - } - ret = 0; error: __free_domain_allocs(&d, alloc_state, cpu_map); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2672e7e6934a..bfd407869ef0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5534,7 +5534,7 @@ static inline bool __task_fits(struct task_struct *p, int cpu, int util) static inline bool task_fits_max(struct task_struct *p, int cpu) { unsigned long capacity = capacity_of(cpu); - unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity; + unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity.val; if (capacity == max_capacity) return true; @@ -7246,13 +7246,43 @@ static unsigned long scale_rt_capacity(int cpu) return 1; } +void init_max_cpu_capacity(struct max_cpu_capacity *mcc) +{ + raw_spin_lock_init(&mcc->lock); + mcc->val = 0; + mcc->cpu = -1; +} + static void update_cpu_capacity(struct sched_domain *sd, int cpu) { unsigned long capacity = arch_scale_cpu_capacity(sd, cpu); struct sched_group *sdg = sd->groups; + struct max_cpu_capacity *mcc; + unsigned long max_capacity; + int max_cap_cpu; + unsigned long flags; cpu_rq(cpu)->cpu_capacity_orig = capacity; + mcc = &cpu_rq(cpu)->rd->max_cpu_capacity; + + raw_spin_lock_irqsave(&mcc->lock, flags); + max_capacity = mcc->val; + max_cap_cpu = mcc->cpu; + + if ((max_capacity > capacity && max_cap_cpu == cpu) || + (max_capacity < capacity)) { + mcc->val = capacity; + mcc->cpu = cpu; +#ifdef CONFIG_SCHED_DEBUG + raw_spin_unlock_irqrestore(&mcc->lock, flags); + pr_info("CPU%d: update max cpu_capacity %lu\n", cpu, capacity); + goto skip_unlock; +#endif + } + raw_spin_unlock_irqrestore(&mcc->lock, flags); + +skip_unlock: __attribute__ ((unused)); capacity *= scale_rt_capacity(cpu); capacity >>= SCHED_CAPACITY_SHIFT; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 86512afe0711..d08f3b054194 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -539,6 +539,12 @@ struct dl_rq { #ifdef CONFIG_SMP +struct max_cpu_capacity { + raw_spinlock_t lock; + unsigned long val; + int cpu; +}; + /* * We add the notion of a root-domain which will be used to define per-domain * variables. Each exclusive cpuset essentially defines an island domain by @@ -576,7 +582,8 @@ struct root_domain { cpumask_var_t rto_mask; struct cpupri cpupri; - unsigned long max_cpu_capacity; + /* Maximum cpu capacity in the system. */ + struct max_cpu_capacity max_cpu_capacity; }; extern struct root_domain def_root_domain; @@ -1414,6 +1421,8 @@ static inline void sched_update_tick_dependency(struct rq *rq) static inline void sched_update_tick_dependency(struct rq *rq) { } #endif +extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc); + static inline void add_nr_running(struct rq *rq, unsigned count) { unsigned prev_nr = rq->nr_running; -- GitLab From 568913e203a12178620e5619606e54375fd690fa Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Wed, 23 Sep 2015 17:59:55 +0100 Subject: [PATCH 1137/1442] ANDROID: arm: Enable max freq invariant scheduler load-tracking and capacity support Maximum Frequency Invariance has to be part of Cpu Invariance because Frequency Invariance deals only with differences in load-tracking introduces by Dynamic Frequency Scaling and not with limiting the possible range of cpu frequency. By placing Maximum Frequency Invariance into Cpu Invariance, load-tracking is scaled via arch_scale_cpu_capacity() in __update_load_avg() and cpu capacity is scaled via arch_scale_cpu_capacity() in update_cpu_capacity(). To be able to save the extra multiplication in the scheduler hotpath (__update_load_avg()) we could: 1 Inform cpufreq about base cpu capacity at boot and let it handle scale_cpu_capacity() as well. 2 Use the cpufreq policy callback which would update a per-cpu current cpu_scale and this value would be return in scale_cpu_capacity(). 3 Use per-cpu current max_freq_scale and current cpu_scale with the current patch. Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- arch/arm/kernel/topology.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index b2cefdf56536..083f90fd009f 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -44,7 +44,13 @@ static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) { +#if CONFIG_CPU_FREQ + unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu); + + return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT; +#else return per_cpu(cpu_scale, cpu); +#endif } static void set_capacity_scale(unsigned int cpu, unsigned long capacity) -- GitLab From 8f4855421f616a186fa6bbfccaaf8a1056fa9df1 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Fri, 25 Sep 2015 17:34:15 +0100 Subject: [PATCH 1138/1442] ANDROID: arm64: Enable max freq invariant scheduler load-tracking and capacity support Maximum Frequency Invariance has to be part of Cpu Invariance because Frequency Invariance deals only with differences in load-tracking introduces by Dynamic Frequency Scaling and not with limiting the possible range of cpu frequency. By placing Maximum Frequency Invariance into Cpu Invariance, load-tracking is scaled via arch_scale_cpu_capacity() in __update_load_avg() and cpu capacity is scaled via arch_scale_cpu_capacity() in update_cpu_capacity(). To be able to save the extra multiplication in the scheduler hotpath (__update_load_avg()) we could: 1 Inform cpufreq about base cpu capacity at boot and let it handle scale_cpu_capacity() as well. 2 Use the cpufreq policy callback which would update a per-cpu current cpu_scale and this value would be return in scale_cpu_capacity(). 3 Use per-cpu current max_freq_scale and current cpu_scale with the current patch. Including in topology.h like for the arm arch doesn't work because of CONFIG_COMPAT=y (Kernel support for 32-bit EL0). That's why cpufreq_scale_max_freq_capacity() has to be declared extern in topology.h. Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- arch/arm64/include/asm/topology.h | 1 + arch/arm64/kernel/topology.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 237eaa0d5cf7..7ec84d0191c8 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -35,6 +35,7 @@ struct sched_domain; #ifdef CONFIG_CPU_FREQ #define arch_scale_freq_capacity cpufreq_scale_freq_capacity extern unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu); +extern unsigned long cpufreq_scale_max_freq_capacity(int cpu); #endif #define arch_scale_cpu_capacity scale_cpu_capacity extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu); diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index b5b43af6a7dc..5b2c67a510d8 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -29,7 +29,13 @@ static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) { +#ifdef CONFIG_CPU_FREQ + unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu); + + return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT; +#else return per_cpu(cpu_scale, cpu); +#endif } static void set_capacity_scale(unsigned int cpu, unsigned long capacity) -- GitLab From 785367fc844d1f0d09d512e9d7df6e32ee340808 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Wed, 13 Jan 2016 15:49:44 +0000 Subject: [PATCH 1139/1442] ANDROID: sched: Do eas idle balance regardless of the rq avg idle value EAS relies on idle balance to migrate a misfit task towards a cpu with higher capacity. When such a cpu becomes idle, idle balance should happen even if the rq avg idle is smaller than the sched migration cost (default 500us). The rq avg idle is updated during the wakeup of a task in case the rq has a non-null idle_stamp. This value stays unchanged and valid until the next task wakes up on this cpu after an idle period. So rq avg idle could be smaller than sched migration cost preventing the idle balance from happening. In this case we would be at the mercy of wakeup, periodic or nohz-idle load balancing to put another task on this cpu. To break this dependency towards rq avg idle make EAS idle balance independent from this rq avg idle has to be larger than sched migration cost. Signed-off-by: Dietmar Eggemann Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bfd407869ef0..5b89ec48ff9e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8452,8 +8452,9 @@ static int idle_balance(struct rq *this_rq) */ this_rq->idle_stamp = rq_clock(this_rq); - if (this_rq->avg_idle < sysctl_sched_migration_cost || - !this_rq->rd->overload) { + if (!energy_aware() && + (this_rq->avg_idle < sysctl_sched_migration_cost || + !this_rq->rd->overload)) { rcu_read_lock(); sd = rcu_dereference_check_sched_domain(this_rq->sd); if (sd) -- GitLab From 5cdeb5f0cfb8f1c36bf1d24e9a831ec8eaaca4cf Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Thu, 25 Feb 2016 12:43:49 +0000 Subject: [PATCH 1140/1442] ANDROID: sched: Add per-cpu max capacity to sched_group_capacity struct sched_group_capacity currently represents the compute capacity sum of all cpus in the sched_group. Unless it is divided by the group_weight to get the average capacity per cpu it hides differences in cpu capacity for mixed capacity systems (e.g. high RT/IRQ utilization or ARM big.LITTLE). But even the average may not be sufficient if the group covers cpus of different capacities. Instead, by extending struct sched_group_capacity to indicate max per-cpu capacity in the group a suitable group for a given task utilization can easily be found such that cpus with reduced capacity can be avoided for tasks with high utilization (not implemented by this patch). Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/core.c | 3 ++- kernel/sched/fair.c | 17 ++++++++++++----- kernel/sched/sched.h | 3 ++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 15a8e35f0198..81c343cf3c3b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5707,7 +5707,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, printk(KERN_CONT " %*pbl", cpumask_pr_args(sched_group_cpus(group))); if (group->sgc->capacity != SCHED_CAPACITY_SCALE) { - printk(KERN_CONT " (cpu_capacity = %d)", + printk(KERN_CONT " (cpu_capacity = %lu)", group->sgc->capacity); } @@ -6202,6 +6202,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) * die on a /0 trap. */ sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); + sg->sgc->max_capacity = SCHED_CAPACITY_SCALE; /* * Make sure the first group of this domain contains the diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5b89ec48ff9e..4ec911886885 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7291,13 +7291,14 @@ skip_unlock: __attribute__ ((unused)); cpu_rq(cpu)->cpu_capacity = capacity; sdg->sgc->capacity = capacity; + sdg->sgc->max_capacity = capacity; } void update_group_capacity(struct sched_domain *sd, int cpu) { struct sched_domain *child = sd->child; struct sched_group *group, *sdg = sd->groups; - unsigned long capacity; + unsigned long capacity, max_capacity; unsigned long interval; interval = msecs_to_jiffies(sd->balance_interval); @@ -7310,6 +7311,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu) } capacity = 0; + max_capacity = 0; if (child->flags & SD_OVERLAP) { /* @@ -7334,11 +7336,12 @@ void update_group_capacity(struct sched_domain *sd, int cpu) */ if (unlikely(!rq->sd)) { capacity += capacity_of(cpu); - continue; + } else { + sgc = rq->sd->groups->sgc; + capacity += sgc->capacity; } - sgc = rq->sd->groups->sgc; - capacity += sgc->capacity; + max_capacity = max(capacity, max_capacity); } } else { /* @@ -7348,12 +7351,16 @@ void update_group_capacity(struct sched_domain *sd, int cpu) group = child->groups; do { - capacity += group->sgc->capacity; + struct sched_group_capacity *sgc = group->sgc; + + capacity += sgc->capacity; + max_capacity = max(sgc->max_capacity, max_capacity); group = group->next; } while (group != child->groups); } sdg->sgc->capacity = capacity; + sdg->sgc->max_capacity = max_capacity; } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d08f3b054194..8b9058507a24 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -905,7 +905,8 @@ struct sched_group_capacity { * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity * for a single CPU. */ - unsigned int capacity; + unsigned long capacity; + unsigned long max_capacity; /* Max per-cpu capacity in group */ unsigned long next_update; int imbalance; /* XXX unrelated to capacity but shared group state */ -- GitLab From 4c6a82485fddd56037c20c08d5b4c6fd6b8fa31c Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Thu, 25 Feb 2016 12:47:54 +0000 Subject: [PATCH 1141/1442] ANDROID: sched: Add group_misfit_task load-balance type To maximize throughput in systems with reduced capacity cpus (e.g. high RT/IRQ load and/or ARM big.LITTLE) load-balancing has to consider task and cpu utilization as well as per-cpu compute capacity when load-balancing in addition to the current average load based load-balancing policy. Tasks that are scheduled on a reduced capacity cpu need to be identified and migrated to a higher capacity cpu if possible. To implement this additional policy an additional group_type (load-balance scenario) is added: group_misfit_task. This represents scenarios where a sched_group has tasks that are not suitable for its per-cpu capacity. group_misfit_task is only considered if the system is not overloaded in any other way (group_imbalanced or group_overloaded). Identifying misfit tasks requires the rq lock to be held. To avoid taking remote rq locks to examine source sched_groups for misfit tasks, each cpu is responsible for tracking misfit tasks themselves and update the rq->misfit_task flag. This means checking task utilization when tasks are scheduled and on sched_tick. Signed-off-by: Morten Rasmussen Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 29 ++++++++++++++++++++++------- kernel/sched/sched.h | 1 + 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4ec911886885..5c61b5a70a17 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6421,6 +6421,8 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct pin_cookie c if (hrtick_enabled(rq)) hrtick_start_fair(rq, p); + rq->misfit_task = !task_fits_max(p, rq->cpu); + return p; simple: cfs_rq = &rq->cfs; @@ -6442,9 +6444,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct pin_cookie c if (hrtick_enabled(rq)) hrtick_start_fair(rq, p); + rq->misfit_task = !task_fits_max(p, rq->cpu); + return p; idle: + rq->misfit_task = 0; /* * This is OK, because current is on_cpu, which avoids it being picked * for load-balance and preemption/IRQs are still disabled avoiding @@ -6657,6 +6662,13 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10; enum fbq_type { regular, remote, all }; +enum group_type { + group_other = 0, + group_misfit_task, + group_imbalanced, + group_overloaded, +}; + #define LBF_ALL_PINNED 0x01 #define LBF_NEED_BREAK 0x02 #define LBF_DST_PINNED 0x04 @@ -7128,12 +7140,6 @@ static unsigned long task_h_load(struct task_struct *p) /********** Helpers for find_busiest_group ************************/ -enum group_type { - group_other = 0, - group_imbalanced, - group_overloaded, -}; - /* * sg_lb_stats - stats of a sched_group required for load_balancing */ @@ -7149,6 +7155,7 @@ struct sg_lb_stats { unsigned int group_weight; enum group_type group_type; int group_no_capacity; + int group_misfit_task; /* A cpu has a task too big for its capacity */ #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; unsigned int nr_preferred_running; @@ -7465,6 +7472,9 @@ group_type group_classify(struct sched_group *group, if (sg_imbalanced(group)) return group_imbalanced; + if (sgs->group_misfit_task) + return group_misfit_task; + return group_other; } @@ -7516,8 +7526,11 @@ static inline void update_sg_lb_stats(struct lb_env *env, if (!nr_running && idle_cpu(i)) sgs->idle_cpus++; - if (cpu_overutilized(i)) + if (cpu_overutilized(i)) { *overutilized = true; + if (!sgs->group_misfit_task && rq->misfit_task) + sgs->group_misfit_task = capacity_of(i); + } } /* Adjust by relative CPU capacity of the group */ @@ -9083,6 +9096,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) rq->rd->overutilized = true; + + rq->misfit_task = !task_fits_max(curr, rq->cpu); } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 8b9058507a24..6c09ffe6d91e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -612,6 +612,7 @@ struct rq { #endif #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; + unsigned int misfit_task; #ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_SMP unsigned long last_load_update_tick; -- GitLab From dabce2bedc7771118b5720258e9cb50fc5b1d34d Mon Sep 17 00:00:00 2001 From: Michael Turquette Date: Tue, 30 Jun 2015 12:45:27 +0100 Subject: [PATCH 1142/1442] ANDROID: cpufreq: introduce cpufreq_driver_is_slow Some architectures and platforms perform CPU frequency transitions through a non-blocking method, while some might block or sleep. Even when frequency transitions do not block or sleep they may be very slow. This distinction is important when trying to change frequency from a non-interruptible context in a scheduler hot path. Describe this distinction with a cpufreq driver flag, CPUFREQ_DRIVER_FAST. The default is to not have this flag set, thus erring on the side of caution. cpufreq_driver_is_slow() is also introduced in this patch. Setting the above flag will allow this function to return false. [smuckle@linaro.org: change flag/API to include drivers that are too slow for scheduler hot paths, in addition to those that block/sleep] Cc: Rafael J. Wysocki Cc: Viresh Kumar Signed-off-by: Michael Turquette Signed-off-by: Steve Muckle Signed-off-by: Andres Oportus --- drivers/cpufreq/cpufreq.c | 6 ++++++ include/linux/cpufreq.h | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cbda4f836c8b..19ea8f545cec 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -117,6 +117,12 @@ bool have_governor_per_policy(void) } EXPORT_SYMBOL_GPL(have_governor_per_policy); +bool cpufreq_driver_is_slow(void) +{ + return !(cpufreq_driver->flags & CPUFREQ_DRIVER_FAST); +} +EXPORT_SYMBOL_GPL(cpufreq_driver_is_slow); + struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy) { if (have_governor_per_policy()) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index ebdc361e63bc..bed4f9326eca 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -177,6 +177,7 @@ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); +bool cpufreq_driver_is_slow(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); void cpufreq_disable_fast_switch(struct cpufreq_policy *policy); @@ -359,6 +360,14 @@ struct cpufreq_driver { */ #define CPUFREQ_NEED_INITIAL_FREQ_CHECK (1 << 5) +/* + * Indicates that it is safe to call cpufreq_driver_target from + * non-interruptable context in scheduler hot paths. Drivers must + * opt-in to this flag, as the safe default is that they might sleep + * or be too slow for hot path use. + */ +#define CPUFREQ_DRIVER_FAST (1 << 6) + int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -- GitLab From 3b6188e5c2083aaf1b8a01dc207d0155b85cac9f Mon Sep 17 00:00:00 2001 From: Michael Turquette Date: Tue, 30 Jun 2015 12:45:48 +0100 Subject: [PATCH 1143/1442] ANDROID: sched: scheduler-driven cpu frequency selection Scheduler-driven CPU frequency selection hopes to exploit both per-task and global information in the scheduler to improve frequency selection policy, achieving lower power consumption, improved responsiveness/performance, and less reliance on heuristics and tunables. For further discussion on the motivation of this integration see [0]. This patch implements a shim layer between the Linux scheduler and the cpufreq subsystem. The interface accepts capacity requests from the CFS, RT and deadline sched classes. The requests from each sched class are summed on each CPU with a margin applied to the CFS and RT capacity requests to provide some headroom. Deadline requests are expected to be precise enough given their nature to not require headroom. The maximum total capacity request for a CPU in a frequency domain drives the requested frequency for that domain. Policy is determined by both the sched classes and this shim layer. Note that this algorithm is event-driven. There is no polling loop to check cpu idle time nor any other method which is unsynchronized with the scheduler, aside from a throttling mechanism to ensure frequency changes are not attempted faster than the hardware can accommodate them. Thanks to Juri Lelli for contributing design ideas, code and test results, and to Ricky Liang for initialization and static key inc/dec fixes. [0] http://article.gmane.org/gmane.linux.kernel/1499836 [smuckle@linaro.org: various additions and fixes, revised commit text] Change-Id: I59a201a297931441d0d2146fc8342794474b4d37 CC: Ricky Liang Signed-off-by: Michael Turquette Signed-off-by: Juri Lelli Signed-off-by: Steve Muckle Signed-off-by: Andres Oportus --- drivers/cpufreq/Kconfig | 35 +- include/linux/cpufreq.h | 26 ++ include/linux/sched.h | 8 + kernel/sched/Makefile | 2 +- kernel/sched/cpufreq_sched.c | 344 ++++++++++++++++++ kernel/sched/cpufreq_schedutil.c | 576 ------------------------------- kernel/sched/sched.h | 51 +++ 7 files changed, 444 insertions(+), 598 deletions(-) create mode 100644 kernel/sched/cpufreq_sched.c delete mode 100644 kernel/sched/cpufreq_schedutil.c diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index d8b164a7c4e5..964853056137 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -102,15 +102,13 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE governor. If unsure have a look at the help section of the driver. Fallback governor will be the performance governor. -config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL - bool "schedutil" - depends on SMP - select CPU_FREQ_GOV_SCHEDUTIL - select CPU_FREQ_GOV_PERFORMANCE +config CPU_FREQ_DEFAULT_GOV_SCHED + bool "sched" + select CPU_FREQ_GOV_SCHED help - Use the 'schedutil' CPUFreq governor by default. If unsure, - have a look at the help section of that governor. The fallback - governor will be 'performance'. + Use the CPUfreq governor 'sched' as default. This scales + cpu frequency using CPU utilization estimates from the + scheduler. endchoice @@ -193,20 +191,15 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. -config CPU_FREQ_GOV_SCHEDUTIL - bool "'schedutil' cpufreq policy governor" - depends on CPU_FREQ && SMP - select CPU_FREQ_GOV_ATTR_SET - select IRQ_WORK +config CPU_FREQ_GOV_SCHED + bool "'sched' cpufreq governor" + depends on CPU_FREQ + select CPU_FREQ_GOV_COMMON help - This governor makes decisions based on the utilization data provided - by the scheduler. It sets the CPU frequency to be proportional to - the utilization/capacity ratio coming from the scheduler. If the - utilization is frequency-invariant, the new frequency is also - proportional to the maximum available frequency. If that is not the - case, it is proportional to the current frequency of the CPU. The - frequency tipping point is at utilization/capacity equal to 80% in - both cases. + 'sched' - this governor scales cpu frequency from the + scheduler as a function of cpu capacity utilization. It does + not evaluate utilization on a periodic basis (as ondemand + does) but instead is event-driven by the scheduler. If in doubt, say N. diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index bed4f9326eca..cc57986d3bfe 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -562,6 +562,32 @@ struct governor_attr { ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf, size_t count); }; +/* CPUFREQ DEFAULT GOVERNOR */ +/* + * Performance governor is fallback governor if any other gov failed to auto + * load due latency restrictions + */ +#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE +extern struct cpufreq_governor cpufreq_gov_performance; +#endif +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_performance) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE) +extern struct cpufreq_governor cpufreq_gov_powersave; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_powersave) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE) +extern struct cpufreq_governor cpufreq_gov_userspace; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_userspace) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND) +extern struct cpufreq_governor cpufreq_gov_ondemand; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_ondemand) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) +extern struct cpufreq_governor cpufreq_gov_conservative; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED) +extern struct cpufreq_governor cpufreq_gov_sched; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_sched) +#endif /********************************************************************* * FREQUENCY TABLE HELPERS * diff --git a/include/linux/sched.h b/include/linux/sched.h index 211de1d8fab9..074a7c0667eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -972,6 +972,14 @@ enum cpu_idle_type { #define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) +struct sched_capacity_reqs { + unsigned long cfs; + unsigned long rt; + unsigned long dl; + + unsigned long total; +}; + /* * Wake-queues are lists of tasks with a pending wakeup, whose * callers have already marked the task as woken internally, diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 8fdb2850564d..5aa8742f49b5 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -24,4 +24,4 @@ obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o -obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o +obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c new file mode 100644 index 000000000000..71b9f14875d4 --- /dev/null +++ b/kernel/sched/cpufreq_sched.c @@ -0,0 +1,344 @@ +/* + * Copyright (C) 2015 Michael Turquette + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "sched.h" + +#define THROTTLE_NSEC 50000000 /* 50ms default */ + +struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE; +static bool __read_mostly cpufreq_driver_slow; + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED +static struct cpufreq_governor cpufreq_gov_sched; +#endif + +static DEFINE_PER_CPU(unsigned long, enabled); +DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); + +/** + * gov_data - per-policy data internal to the governor + * @throttle: next throttling period expiry. Derived from throttle_nsec + * @throttle_nsec: throttle period length in nanoseconds + * @task: worker thread for dvfs transition that may block/sleep + * @irq_work: callback used to wake up worker thread + * @requested_freq: last frequency requested by the sched governor + * + * struct gov_data is the per-policy cpufreq_sched-specific data structure. A + * per-policy instance of it is created when the cpufreq_sched governor receives + * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data + * member of struct cpufreq_policy. + * + * Readers of this data must call down_read(policy->rwsem). Writers must + * call down_write(policy->rwsem). + */ +struct gov_data { + ktime_t throttle; + unsigned int throttle_nsec; + struct task_struct *task; + struct irq_work irq_work; + unsigned int requested_freq; +}; + +static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy, + unsigned int freq) +{ + struct gov_data *gd = policy->governor_data; + + /* avoid race with cpufreq_sched_stop */ + if (!down_write_trylock(&policy->rwsem)) + return; + + __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); + + gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec); + up_write(&policy->rwsem); +} + +static bool finish_last_request(struct gov_data *gd) +{ + ktime_t now = ktime_get(); + + if (ktime_after(now, gd->throttle)) + return false; + + while (1) { + int usec_left = ktime_to_ns(ktime_sub(gd->throttle, now)); + + usec_left /= NSEC_PER_USEC; + usleep_range(usec_left, usec_left + 100); + now = ktime_get(); + if (ktime_after(now, gd->throttle)) + return true; + } +} + +/* + * we pass in struct cpufreq_policy. This is safe because changing out the + * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP), + * which tears down all of the data structures and __cpufreq_governor(policy, + * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the + * new policy pointer + */ +static int cpufreq_sched_thread(void *data) +{ + struct sched_param param; + struct cpufreq_policy *policy; + struct gov_data *gd; + unsigned int new_request = 0; + unsigned int last_request = 0; + int ret; + + policy = (struct cpufreq_policy *) data; + gd = policy->governor_data; + + param.sched_priority = 50; + ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, ¶m); + if (ret) { + pr_warn("%s: failed to set SCHED_FIFO\n", __func__); + do_exit(-EINVAL); + } else { + pr_debug("%s: kthread (%d) set to SCHED_FIFO\n", + __func__, gd->task->pid); + } + + do { + set_current_state(TASK_INTERRUPTIBLE); + new_request = gd->requested_freq; + if (new_request == last_request) { + schedule(); + } else { + /* + * if the frequency thread sleeps while waiting to be + * unthrottled, start over to check for a newer request + */ + if (finish_last_request(gd)) + continue; + last_request = new_request; + cpufreq_sched_try_driver_target(policy, new_request); + } + } while (!kthread_should_stop()); + + return 0; +} + +static void cpufreq_sched_irq_work(struct irq_work *irq_work) +{ + struct gov_data *gd; + + gd = container_of(irq_work, struct gov_data, irq_work); + if (!gd) + return; + + wake_up_process(gd->task); +} + +static void update_fdomain_capacity_request(int cpu) +{ + unsigned int freq_new, index_new, cpu_tmp; + struct cpufreq_policy *policy; + struct gov_data *gd; + unsigned long capacity = 0; + + /* + * Avoid grabbing the policy if possible. A test is still + * required after locking the CPU's policy to avoid racing + * with the governor changing. + */ + if (!per_cpu(enabled, cpu)) + return; + + policy = cpufreq_cpu_get(cpu); + if (IS_ERR_OR_NULL(policy)) + return; + + if (policy->governor != &cpufreq_gov_sched || + !policy->governor_data) + goto out; + + gd = policy->governor_data; + + /* find max capacity requested by cpus in this policy */ + for_each_cpu(cpu_tmp, policy->cpus) { + struct sched_capacity_reqs *scr; + + scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp); + capacity = max(capacity, scr->total); + } + + /* Convert the new maximum capacity request into a cpu frequency */ + freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT; + index_new = cpufreq_frequency_table_target(policy, freq_new, CPUFREQ_RELATION_L); + freq_new = policy->freq_table[index_new].frequency; + + if (freq_new == gd->requested_freq) + goto out; + + gd->requested_freq = freq_new; + + /* + * Throttling is not yet supported on platforms with fast cpufreq + * drivers. + */ + if (cpufreq_driver_slow) + irq_work_queue_on(&gd->irq_work, cpu); + else + cpufreq_sched_try_driver_target(policy, freq_new); + +out: + cpufreq_cpu_put(policy); +} + +void update_cpu_capacity_request(int cpu, bool request) +{ + unsigned long new_capacity; + struct sched_capacity_reqs *scr; + + /* The rq lock serializes access to the CPU's sched_capacity_reqs. */ + lockdep_assert_held(&cpu_rq(cpu)->lock); + + scr = &per_cpu(cpu_sched_capacity_reqs, cpu); + + new_capacity = scr->cfs + scr->rt; + new_capacity = new_capacity * capacity_margin + / SCHED_CAPACITY_SCALE; + new_capacity += scr->dl; + + if (new_capacity == scr->total) + return; + + scr->total = new_capacity; + if (request) + update_fdomain_capacity_request(cpu); +} + +static inline void set_sched_freq(void) +{ + static_key_slow_inc(&__sched_freq); +} + +static inline void clear_sched_freq(void) +{ + static_key_slow_dec(&__sched_freq); +} + +static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) +{ + struct gov_data *gd; + int cpu; + + for_each_cpu(cpu, policy->cpus) + memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0, + sizeof(struct sched_capacity_reqs)); + + gd = kzalloc(sizeof(*gd), GFP_KERNEL); + if (!gd) + return -ENOMEM; + + gd->throttle_nsec = policy->cpuinfo.transition_latency ? + policy->cpuinfo.transition_latency : + THROTTLE_NSEC; + pr_debug("%s: throttle threshold = %u [ns]\n", + __func__, gd->throttle_nsec); + + if (cpufreq_driver_is_slow()) { + cpufreq_driver_slow = true; + gd->task = kthread_create(cpufreq_sched_thread, policy, + "kschedfreq:%d", + cpumask_first(policy->related_cpus)); + if (IS_ERR_OR_NULL(gd->task)) { + pr_err("%s: failed to create kschedfreq thread\n", + __func__); + goto err; + } + get_task_struct(gd->task); + kthread_bind_mask(gd->task, policy->related_cpus); + wake_up_process(gd->task); + init_irq_work(&gd->irq_work, cpufreq_sched_irq_work); + } + + policy->governor_data = gd; + set_sched_freq(); + + return 0; + +err: + kfree(gd); + return -ENOMEM; +} + +static void cpufreq_sched_policy_exit(struct cpufreq_policy *policy) +{ + struct gov_data *gd = policy->governor_data; + + clear_sched_freq(); + if (cpufreq_driver_slow) { + kthread_stop(gd->task); + put_task_struct(gd->task); + } + + policy->governor_data = NULL; + + kfree(gd); +} + +static int cpufreq_sched_start(struct cpufreq_policy *policy) +{ + int cpu; + + for_each_cpu(cpu, policy->cpus) + per_cpu(enabled, cpu) = 1; + + return 0; +} + +static void cpufreq_sched_stop(struct cpufreq_policy *policy) +{ + int cpu; + + for_each_cpu(cpu, policy->cpus) + per_cpu(enabled, cpu) = 0; +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED +static +#endif +struct cpufreq_governor cpufreq_gov_sched = { + .name = "sched", + .init = cpufreq_sched_policy_init, + .exit = cpufreq_sched_policy_exit, + .start = cpufreq_sched_start, + .stop = cpufreq_sched_stop, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_sched_init(void) +{ + int cpu; + + for_each_cpu(cpu, cpu_possible_mask) + per_cpu(enabled, cpu) = 0; + return cpufreq_register_governor(&cpufreq_gov_sched); +} + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_sched; +} +#endif + +/* Try to make this the default governor */ +fs_initcall(cpufreq_sched_init); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c deleted file mode 100644 index 69e06898997d..000000000000 --- a/kernel/sched/cpufreq_schedutil.c +++ /dev/null @@ -1,576 +0,0 @@ -/* - * CPUFreq governor based on scheduler-provided CPU utilization data. - * - * Copyright (C) 2016, Intel Corporation - * Author: Rafael J. Wysocki - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include - -#include "sched.h" - -struct sugov_tunables { - struct gov_attr_set attr_set; - unsigned int rate_limit_us; -}; - -struct sugov_policy { - struct cpufreq_policy *policy; - - struct sugov_tunables *tunables; - struct list_head tunables_hook; - - raw_spinlock_t update_lock; /* For shared policies */ - u64 last_freq_update_time; - s64 freq_update_delay_ns; - unsigned int next_freq; - - /* The next fields are only needed if fast switch cannot be used. */ - struct irq_work irq_work; - struct work_struct work; - struct mutex work_lock; - bool work_in_progress; - - bool need_freq_update; -}; - -struct sugov_cpu { - struct update_util_data update_util; - struct sugov_policy *sg_policy; - - unsigned int cached_raw_freq; - unsigned long iowait_boost; - unsigned long iowait_boost_max; - u64 last_update; - - /* The fields below are only needed when sharing a policy. */ - unsigned long util; - unsigned long max; - unsigned int flags; -}; - -static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); - -/************************ Governor internals ***********************/ - -static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) -{ - s64 delta_ns; - - if (sg_policy->work_in_progress) - return false; - - if (unlikely(sg_policy->need_freq_update)) { - sg_policy->need_freq_update = false; - /* - * This happens when limits change, so forget the previous - * next_freq value and force an update. - */ - sg_policy->next_freq = UINT_MAX; - return true; - } - - delta_ns = time - sg_policy->last_freq_update_time; - return delta_ns >= sg_policy->freq_update_delay_ns; -} - -static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, - unsigned int next_freq) -{ - struct cpufreq_policy *policy = sg_policy->policy; - - sg_policy->last_freq_update_time = time; - - if (policy->fast_switch_enabled) { - if (sg_policy->next_freq == next_freq) { - trace_cpu_frequency(policy->cur, smp_processor_id()); - return; - } - sg_policy->next_freq = next_freq; - next_freq = cpufreq_driver_fast_switch(policy, next_freq); - if (next_freq == CPUFREQ_ENTRY_INVALID) - return; - - policy->cur = next_freq; - trace_cpu_frequency(next_freq, smp_processor_id()); - } else if (sg_policy->next_freq != next_freq) { - sg_policy->next_freq = next_freq; - sg_policy->work_in_progress = true; - irq_work_queue(&sg_policy->irq_work); - } -} - -/** - * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @sg_cpu: schedutil cpu object to compute the new frequency for. - * @util: Current CPU utilization. - * @max: CPU capacity. - * - * If the utilization is frequency-invariant, choose the new frequency to be - * proportional to it, that is - * - * next_freq = C * max_freq * util / max - * - * Otherwise, approximate the would-be frequency-invariant utilization by - * util_raw * (curr_freq / max_freq) which leads to - * - * next_freq = C * curr_freq * util_raw / max - * - * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. - * - * The lowest driver-supported frequency which is equal or greater than the raw - * next_freq (as calculated above) is returned, subject to policy min/max and - * cpufreq driver limitations. - */ -static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, - unsigned long max) -{ - struct sugov_policy *sg_policy = sg_cpu->sg_policy; - struct cpufreq_policy *policy = sg_policy->policy; - unsigned int freq = arch_scale_freq_invariant() ? - policy->cpuinfo.max_freq : policy->cur; - - freq = (freq + (freq >> 2)) * util / max; - - if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) - return sg_policy->next_freq; - sg_cpu->cached_raw_freq = freq; - return cpufreq_driver_resolve_freq(policy, freq); -} - -static void sugov_get_util(unsigned long *util, unsigned long *max) -{ - struct rq *rq = this_rq(); - unsigned long cfs_max; - - cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id()); - - *util = min(rq->cfs.avg.util_avg, cfs_max); - *max = cfs_max; -} - -static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, - unsigned int flags) -{ - if (flags & SCHED_CPUFREQ_IOWAIT) { - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; - } else if (sg_cpu->iowait_boost) { - s64 delta_ns = time - sg_cpu->last_update; - - /* Clear iowait_boost if the CPU apprears to have been idle. */ - if (delta_ns > TICK_NSEC) - sg_cpu->iowait_boost = 0; - } -} - -static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, - unsigned long *max) -{ - unsigned long boost_util = sg_cpu->iowait_boost; - unsigned long boost_max = sg_cpu->iowait_boost_max; - - if (!boost_util) - return; - - if (*util * boost_max < *max * boost_util) { - *util = boost_util; - *max = boost_max; - } - sg_cpu->iowait_boost >>= 1; -} - -static void sugov_update_single(struct update_util_data *hook, u64 time, - unsigned int flags) -{ - struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); - struct sugov_policy *sg_policy = sg_cpu->sg_policy; - struct cpufreq_policy *policy = sg_policy->policy; - unsigned long util, max; - unsigned int next_f; - - sugov_set_iowait_boost(sg_cpu, time, flags); - sg_cpu->last_update = time; - - if (!sugov_should_update_freq(sg_policy, time)) - return; - - if (flags & SCHED_CPUFREQ_RT_DL) { - next_f = policy->cpuinfo.max_freq; - } else { - sugov_get_util(&util, &max); - sugov_iowait_boost(sg_cpu, &util, &max); - next_f = get_next_freq(sg_cpu, util, max); - } - sugov_update_commit(sg_policy, time, next_f); -} - -static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, - unsigned long util, unsigned long max, - unsigned int flags) -{ - struct sugov_policy *sg_policy = sg_cpu->sg_policy; - struct cpufreq_policy *policy = sg_policy->policy; - unsigned int max_f = policy->cpuinfo.max_freq; - u64 last_freq_update_time = sg_policy->last_freq_update_time; - unsigned int j; - - if (flags & SCHED_CPUFREQ_RT_DL) - return max_f; - - sugov_iowait_boost(sg_cpu, &util, &max); - - for_each_cpu(j, policy->cpus) { - struct sugov_cpu *j_sg_cpu; - unsigned long j_util, j_max; - s64 delta_ns; - - if (j == smp_processor_id()) - continue; - - j_sg_cpu = &per_cpu(sugov_cpu, j); - /* - * If the CPU utilization was last updated before the previous - * frequency update and the time elapsed between the last update - * of the CPU utilization and the last frequency update is long - * enough, don't take the CPU into account as it probably is - * idle now (and clear iowait_boost for it). - */ - delta_ns = last_freq_update_time - j_sg_cpu->last_update; - if (delta_ns > TICK_NSEC) { - j_sg_cpu->iowait_boost = 0; - continue; - } - if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) - return max_f; - - j_util = j_sg_cpu->util; - j_max = j_sg_cpu->max; - if (j_util * max > j_max * util) { - util = j_util; - max = j_max; - } - - sugov_iowait_boost(j_sg_cpu, &util, &max); - } - - return get_next_freq(sg_cpu, util, max); -} - -static void sugov_update_shared(struct update_util_data *hook, u64 time, - unsigned int flags) -{ - struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); - struct sugov_policy *sg_policy = sg_cpu->sg_policy; - unsigned long util, max; - unsigned int next_f; - - sugov_get_util(&util, &max); - - raw_spin_lock(&sg_policy->update_lock); - - sg_cpu->util = util; - sg_cpu->max = max; - sg_cpu->flags = flags; - - sugov_set_iowait_boost(sg_cpu, time, flags); - sg_cpu->last_update = time; - - if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); - sugov_update_commit(sg_policy, time, next_f); - } - - raw_spin_unlock(&sg_policy->update_lock); -} - -static void sugov_work(struct work_struct *work) -{ - struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); - - mutex_lock(&sg_policy->work_lock); - __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, - CPUFREQ_RELATION_L); - mutex_unlock(&sg_policy->work_lock); - - sg_policy->work_in_progress = false; -} - -static void sugov_irq_work(struct irq_work *irq_work) -{ - struct sugov_policy *sg_policy; - - sg_policy = container_of(irq_work, struct sugov_policy, irq_work); - schedule_work_on(smp_processor_id(), &sg_policy->work); -} - -/************************** sysfs interface ************************/ - -static struct sugov_tunables *global_tunables; -static DEFINE_MUTEX(global_tunables_lock); - -static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) -{ - return container_of(attr_set, struct sugov_tunables, attr_set); -} - -static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) -{ - struct sugov_tunables *tunables = to_sugov_tunables(attr_set); - - return sprintf(buf, "%u\n", tunables->rate_limit_us); -} - -static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, - size_t count) -{ - struct sugov_tunables *tunables = to_sugov_tunables(attr_set); - struct sugov_policy *sg_policy; - unsigned int rate_limit_us; - - if (kstrtouint(buf, 10, &rate_limit_us)) - return -EINVAL; - - tunables->rate_limit_us = rate_limit_us; - - list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) - sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; - - return count; -} - -static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); - -static struct attribute *sugov_attributes[] = { - &rate_limit_us.attr, - NULL -}; - -static struct kobj_type sugov_tunables_ktype = { - .default_attrs = sugov_attributes, - .sysfs_ops = &governor_sysfs_ops, -}; - -/********************** cpufreq governor interface *********************/ - -static struct cpufreq_governor schedutil_gov; - -static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy; - - sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); - if (!sg_policy) - return NULL; - - sg_policy->policy = policy; - init_irq_work(&sg_policy->irq_work, sugov_irq_work); - INIT_WORK(&sg_policy->work, sugov_work); - mutex_init(&sg_policy->work_lock); - raw_spin_lock_init(&sg_policy->update_lock); - return sg_policy; -} - -static void sugov_policy_free(struct sugov_policy *sg_policy) -{ - mutex_destroy(&sg_policy->work_lock); - kfree(sg_policy); -} - -static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) -{ - struct sugov_tunables *tunables; - - tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); - if (tunables) { - gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); - if (!have_governor_per_policy()) - global_tunables = tunables; - } - return tunables; -} - -static void sugov_tunables_free(struct sugov_tunables *tunables) -{ - if (!have_governor_per_policy()) - global_tunables = NULL; - - kfree(tunables); -} - -static int sugov_init(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy; - struct sugov_tunables *tunables; - unsigned int lat; - int ret = 0; - - /* State should be equivalent to EXIT */ - if (policy->governor_data) - return -EBUSY; - - sg_policy = sugov_policy_alloc(policy); - if (!sg_policy) - return -ENOMEM; - - mutex_lock(&global_tunables_lock); - - if (global_tunables) { - if (WARN_ON(have_governor_per_policy())) { - ret = -EINVAL; - goto free_sg_policy; - } - policy->governor_data = sg_policy; - sg_policy->tunables = global_tunables; - - gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); - goto out; - } - - tunables = sugov_tunables_alloc(sg_policy); - if (!tunables) { - ret = -ENOMEM; - goto free_sg_policy; - } - - tunables->rate_limit_us = LATENCY_MULTIPLIER; - lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; - if (lat) - tunables->rate_limit_us *= lat; - - policy->governor_data = sg_policy; - sg_policy->tunables = tunables; - - ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, - get_governor_parent_kobj(policy), "%s", - schedutil_gov.name); - if (ret) - goto fail; - - out: - mutex_unlock(&global_tunables_lock); - - cpufreq_enable_fast_switch(policy); - return 0; - - fail: - policy->governor_data = NULL; - sugov_tunables_free(tunables); - - free_sg_policy: - mutex_unlock(&global_tunables_lock); - - sugov_policy_free(sg_policy); - pr_err("initialization failed (error %d)\n", ret); - return ret; -} - -static void sugov_exit(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy = policy->governor_data; - struct sugov_tunables *tunables = sg_policy->tunables; - unsigned int count; - - cpufreq_disable_fast_switch(policy); - - mutex_lock(&global_tunables_lock); - - count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); - policy->governor_data = NULL; - if (!count) - sugov_tunables_free(tunables); - - mutex_unlock(&global_tunables_lock); - - sugov_policy_free(sg_policy); -} - -static int sugov_start(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy = policy->governor_data; - unsigned int cpu; - - sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; - sg_policy->last_freq_update_time = 0; - sg_policy->next_freq = UINT_MAX; - sg_policy->work_in_progress = false; - sg_policy->need_freq_update = false; - - for_each_cpu(cpu, policy->cpus) { - struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); - - sg_cpu->sg_policy = sg_policy; - if (policy_is_shared(policy)) { - sg_cpu->util = 0; - sg_cpu->max = 0; - sg_cpu->flags = SCHED_CPUFREQ_RT; - sg_cpu->last_update = 0; - sg_cpu->cached_raw_freq = 0; - sg_cpu->iowait_boost = 0; - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_shared); - } else { - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_single); - } - } - return 0; -} - -static void sugov_stop(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy = policy->governor_data; - unsigned int cpu; - - for_each_cpu(cpu, policy->cpus) - cpufreq_remove_update_util_hook(cpu); - - synchronize_sched(); - - irq_work_sync(&sg_policy->irq_work); - cancel_work_sync(&sg_policy->work); -} - -static void sugov_limits(struct cpufreq_policy *policy) -{ - struct sugov_policy *sg_policy = policy->governor_data; - - if (!policy->fast_switch_enabled) { - mutex_lock(&sg_policy->work_lock); - cpufreq_policy_apply_limits(policy); - mutex_unlock(&sg_policy->work_lock); - } - - sg_policy->need_freq_update = true; -} - -static struct cpufreq_governor schedutil_gov = { - .name = "schedutil", - .owner = THIS_MODULE, - .init = sugov_init, - .exit = sugov_exit, - .start = sugov_start, - .stop = sugov_stop, - .limits = sugov_limits, -}; - -#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL -struct cpufreq_governor *cpufreq_default_governor(void) -{ - return &schedutil_gov; -} -#endif - -static int __init sugov_register(void) -{ - return cpufreq_register_governor(&schedutil_gov); -} -fs_initcall(sugov_register); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6c09ffe6d91e..9a7840fb8d7e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1520,6 +1520,57 @@ unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) } #endif +#ifdef CONFIG_CPU_FREQ_GOV_SCHED +extern unsigned int capacity_margin; +extern struct static_key __sched_freq; + +static inline bool sched_freq(void) +{ + return static_key_false(&__sched_freq); +} + +DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); +void update_cpu_capacity_request(int cpu, bool request); + +static inline void set_cfs_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ + if (per_cpu(cpu_sched_capacity_reqs, cpu).cfs != capacity) { + per_cpu(cpu_sched_capacity_reqs, cpu).cfs = capacity; + update_cpu_capacity_request(cpu, request); + } +} + +static inline void set_rt_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ + if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) { + per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity; + update_cpu_capacity_request(cpu, request); + } +} + +static inline void set_dl_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ + if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) { + per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity; + update_cpu_capacity_request(cpu, request); + } +} +#else +static inline bool sched_freq(void) { return false; } +static inline void set_cfs_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ } +static inline void set_rt_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ } +static inline void set_dl_cpu_capacity(int cpu, bool request, + unsigned long capacity) +{ } +#endif + static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq)); -- GitLab From 4585a266e09dbda8da216fdf92d5999e13ae6537 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Wed, 19 Aug 2015 19:47:12 +0100 Subject: [PATCH 1144/1442] ANDROID: sched/fair: add triggers for OPP change requests Each time a task is {en,de}queued we might need to adapt the current frequency to the new usage. Add triggers on {en,de}queue_task_fair() for this purpose. Only trigger a freq request if we are effectively waking up or going to sleep. Filter out load balancing related calls to reduce the number of triggers. [smuckle@linaro.org: resolve merge conflicts, define task_new, use renamed static key sched_freq] cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Juri Lelli Signed-off-by: Steve Muckle Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 46 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5c61b5a70a17..7c1732630f45 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4528,6 +4528,21 @@ static inline void hrtick_update(struct rq *rq) } #endif +static unsigned long capacity_orig_of(int cpu); +static int cpu_util(int cpu); + +static void update_capacity_of(int cpu) +{ + unsigned long req_cap; + + if (!sched_freq()) + return; + + /* Convert scale-invariant capacity to cpu. */ + req_cap = cpu_util(cpu) * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); + set_cfs_cpu_capacity(cpu, true, req_cap); +} + static bool cpu_overutilized(int cpu); /* @@ -4585,6 +4600,20 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (!task_new && !rq->rd->overutilized && cpu_overutilized(rq->cpu)) rq->rd->overutilized = true; + + /* + * We want to potentially trigger a freq switch + * request only for tasks that are waking up; this is + * because we get here also during load balancing, but + * in these cases it seems wise to trigger as single + * request after load balancing is done. + * + * XXX: how about fork()? Do we need a special + * flag/something to tell if we are here after a + * fork() (wakeup_task_new)? + */ + if (!task_new) + update_capacity_of(cpu_of(rq)); } hrtick_update(rq); } @@ -4642,9 +4671,24 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) update_cfs_shares(cfs_rq); } - if (!se) + if (!se) { sub_nr_running(rq, 1); + /* + * We want to potentially trigger a freq switch + * request only for tasks that are going to sleep; + * this is because we get here also during load + * balancing, but in these cases it seems wise to + * trigger as single request after load balancing is + * done. + */ + if (task_sleep) { + if (rq->cfs.nr_running) + update_capacity_of(cpu_of(rq)); + else if (sched_freq()) + set_cfs_cpu_capacity(cpu_of(rq), false, 0); + } + } hrtick_update(rq); } -- GitLab From 43aac8939996d36140a4054b14afd31cb184127c Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Fri, 26 Jun 2015 12:14:23 +0100 Subject: [PATCH 1145/1442] ANDROID: sched/{core,fair}: trigger OPP change request on fork() Patch "sched/fair: add triggers for OPP change requests" introduced OPP change triggers for enqueue_task_fair(), but the trigger was operating only for wakeups. Fact is that it makes sense to consider wakeup_new also (i.e., fork()), as we don't know anything about a newly created task and thus we most certainly want to jump to max OPP to not harm performance too much. However, it is not currently possible (or at least it wasn't evident to me how to do so :/) to tell new wakeups from other (non wakeup) operations. This patch introduces an additional flag in sched.h that is only set at fork() time and it is then consumed in enqueue_task_fair() for our purpose. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Juri Lelli Signed-off-by: Steve Muckle Signed-off-by: Andres Oportus --- kernel/sched/core.c | 2 +- kernel/sched/fair.c | 11 ++++------- kernel/sched/sched.h | 1 + 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 81c343cf3c3b..9c3a9a460ce6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2579,7 +2579,7 @@ void wake_up_new_task(struct task_struct *p) rq = __task_rq_lock(p, &rf); post_init_entity_util_avg(&p->se); - activate_task(rq, p, 0); + activate_task(rq, p, ENQUEUE_WAKEUP_NEW); p->on_rq = TASK_ON_RQ_QUEUED; trace_sched_wakeup_new(p); check_preempt_curr(rq, p, WF_FORK); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7c1732630f45..1ba983f82376 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4529,7 +4529,7 @@ static inline void hrtick_update(struct rq *rq) #endif static unsigned long capacity_orig_of(int cpu); -static int cpu_util(int cpu); +static unsigned long cpu_util(int cpu); static void update_capacity_of(int cpu) { @@ -4555,7 +4555,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; - int task_new = !(flags & ENQUEUE_WAKEUP); + int task_new = flags & ENQUEUE_WAKEUP_NEW; + int task_wakeup = flags & ENQUEUE_WAKEUP; /* * If in_iowait is set, the code below may not trigger any cpufreq @@ -4607,12 +4608,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) * because we get here also during load balancing, but * in these cases it seems wise to trigger as single * request after load balancing is done. - * - * XXX: how about fork()? Do we need a special - * flag/something to tell if we are here after a - * fork() (wakeup_task_new)? */ - if (!task_new) + if (task_new || task_wakeup) update_capacity_of(cpu_of(rq)); } hrtick_update(rq); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9a7840fb8d7e..7a1f5137f0e5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1229,6 +1229,7 @@ extern const u32 sched_prio_to_wmult[40]; #else #define ENQUEUE_MIGRATED 0x00 #endif +#define ENQUEUE_WAKEUP_NEW 0x40 #define RETRY_TASK ((void *)-1UL) -- GitLab From c9bf15a39afe75147772e9c6e895bbaf7e16a434 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Thu, 25 Jun 2015 14:37:27 +0100 Subject: [PATCH 1146/1442] ANDROID: sched/fair: cpufreq_sched triggers for load balancing As we don't trigger freq changes from {en,de}queue_task_fair() during load balancing, we need to do explicitly so on load balancing paths. [smuckle@linaro.org: move update_capacity_of calls so rq lock is held] cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Juri Lelli Signed-off-by: Steve Muckle Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1ba983f82376..0b45d15ecbfb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7065,6 +7065,10 @@ static void attach_one_task(struct rq *rq, struct task_struct *p) { raw_spin_lock(&rq->lock); attach_task(rq, p); + /* + * We want to potentially raise target_cpu's OPP. + */ + update_capacity_of(cpu_of(rq)); raw_spin_unlock(&rq->lock); } @@ -7086,6 +7090,11 @@ static void attach_tasks(struct lb_env *env) attach_task(env->dst_rq, p); } + /* + * We want to potentially raise env.dst_cpu's OPP. + */ + update_capacity_of(env->dst_cpu); + raw_spin_unlock(&env->dst_rq->lock); } @@ -8284,6 +8293,11 @@ static int load_balance(int this_cpu, struct rq *this_rq, * ld_moved - cumulative load moved across iterations */ cur_ld_moved = detach_tasks(&env); + /* + * We want to potentially lower env.src_cpu's OPP. + */ + if (cur_ld_moved) + update_capacity_of(env.src_cpu); /* * We've detached some tasks from busiest_rq. Every @@ -8650,6 +8664,10 @@ static int active_load_balance_cpu_stop(void *data) p = detach_one_task(&env); if (p) { schedstat_inc(sd->alb_pushed); + /* + * We want to potentially lower env.src_cpu's OPP. + */ + update_capacity_of(env.src_cpu); /* Active balancing done, reset the failure counter. */ sd->nr_balance_failed = 0; } else { -- GitLab From 608d49484ee466d9ef89bd70867f850311a1e1c4 Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Thu, 25 Jun 2015 14:12:33 +0100 Subject: [PATCH 1147/1442] ANDROID: sched/fair: jump to max OPP when crossing UP threshold Since the true utilization of a long running task is not detectable while it is running and might be bigger than the current cpu capacity, create the maximum cpu capacity head room by requesting the maximum cpu capacity once the cpu usage plus the capacity margin exceeds the current capacity. This is also done to try to harm the performance of a task the least. Original fair-class only version authored by Juri Lelli . cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Juri Lelli Signed-off-by: Steve Muckle Signed-off-by: Andres Oportus --- kernel/sched/core.c | 41 ++++++++++++++++++++++++++ kernel/sched/fair.c | 63 ---------------------------------------- kernel/sched/sched.h | 68 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 63 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9c3a9a460ce6..8e60c55be0c8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3068,6 +3068,45 @@ unsigned long long task_sched_runtime(struct task_struct *p) return ns; } +#ifdef CONFIG_CPU_FREQ_GOV_SCHED +static unsigned long sum_capacity_reqs(unsigned long cfs_cap, + struct sched_capacity_reqs *scr) +{ + unsigned long total = cfs_cap + scr->rt; + + total = total * capacity_margin; + total /= SCHED_CAPACITY_SCALE; + total += scr->dl; + return total; +} + +static void sched_freq_tick(int cpu) +{ + struct sched_capacity_reqs *scr; + unsigned long capacity_orig, capacity_curr; + + if (!sched_freq()) + return; + + capacity_orig = capacity_orig_of(cpu); + capacity_curr = capacity_curr_of(cpu); + if (capacity_curr == capacity_orig) + return; + + /* + * To make free room for a task that is building up its "real" + * utilization and to harm its performance the least, request + * a jump to max OPP as soon as the margin of free capacity is + * impacted (specified by capacity_margin). + */ + scr = &per_cpu(cpu_sched_capacity_reqs, cpu); + if (capacity_curr < sum_capacity_reqs(cpu_util(cpu), scr)) + set_cfs_cpu_capacity(cpu, true, capacity_max); +} +#else +static inline void sched_freq_tick(int cpu) { } +#endif + /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. @@ -3094,6 +3133,8 @@ void scheduler_tick(void) trigger_load_balance(rq); #endif rq_last_tick_reset(rq); + + sched_freq_tick(cpu); } #ifdef CONFIG_NO_HZ_FULL diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0b45d15ecbfb..4b4f5fcd7180 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4992,15 +4992,6 @@ static unsigned long target_load(int cpu, int type) return max(rq->cpu_load[type-1], total); } -static unsigned long capacity_of(int cpu) -{ - return cpu_rq(cpu)->cpu_capacity; -} - -static unsigned long capacity_orig_of(int cpu) -{ - return cpu_rq(cpu)->cpu_capacity_orig; -} static unsigned long cpu_avg_load_per_task(int cpu) { @@ -5151,60 +5142,6 @@ static void record_wakee(struct task_struct *p) } } -/* - * Returns the current capacity of cpu after applying both - * cpu and freq scaling. - */ -static unsigned long capacity_curr_of(int cpu) -{ - return cpu_rq(cpu)->cpu_capacity_orig * - arch_scale_freq_capacity(NULL, cpu) - >> SCHED_CAPACITY_SHIFT; -} - -/* - * cpu_util returns the amount of capacity of a CPU that is used by CFS - * tasks. The unit of the return value must be the one of capacity so we can - * compare the utilization with the capacity of the CPU that is available for - * CFS task (ie cpu_capacity). - * - * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the - * recent utilization of currently non-runnable tasks on a CPU. It represents - * the amount of utilization of a CPU in the range [0..capacity_orig] where - * capacity_orig is the cpu_capacity available at the highest frequency - * (arch_scale_freq_capacity()). - * The utilization of a CPU converges towards a sum equal to or less than the - * current capacity (capacity_curr <= capacity_orig) of the CPU because it is - * the running time on this CPU scaled by capacity_curr. - * - * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even - * higher than capacity_orig because of unfortunate rounding in - * cfs.avg.util_avg or just after migrating tasks and new task wakeups until - * the average stabilizes with the new running time. We need to check that the - * utilization stays within the range of [0..capacity_orig] and cap it if - * necessary. Without utilization capping, a group could be seen as overloaded - * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of - * available capacity. We allow utilization to overshoot capacity_curr (but not - * capacity_orig) as it useful for predicting the capacity required after task - * migrations (scheduler-driven DVFS). - */ -static unsigned long __cpu_util(int cpu, int delta) -{ - unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; - unsigned long capacity = capacity_orig_of(cpu); - - delta += util; - if (delta < 0) - return 0; - - return (delta >= capacity) ? capacity : delta; -} - -static unsigned long cpu_util(int cpu) -{ - return __cpu_util(cpu, 0); -} - static inline bool energy_aware(void) { return sched_feat(ENERGY_AWARE); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7a1f5137f0e5..7c59c9b4e7ea 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1521,7 +1521,75 @@ unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) } #endif +#ifdef CONFIG_SMP +static inline unsigned long capacity_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity; +} + +static inline unsigned long capacity_orig_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity_orig; +} + +/* + * cpu_util returns the amount of capacity of a CPU that is used by CFS + * tasks. The unit of the return value must be the one of capacity so we can + * compare the utilization with the capacity of the CPU that is available for + * CFS task (ie cpu_capacity). + * + * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the + * recent utilization of currently non-runnable tasks on a CPU. It represents + * the amount of utilization of a CPU in the range [0..capacity_orig] where + * capacity_orig is the cpu_capacity available at the highest frequency + * (arch_scale_freq_capacity()). + * The utilization of a CPU converges towards a sum equal to or less than the + * current capacity (capacity_curr <= capacity_orig) of the CPU because it is + * the running time on this CPU scaled by capacity_curr. + * + * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even + * higher than capacity_orig because of unfortunate rounding in + * cfs.avg.util_avg or just after migrating tasks and new task wakeups until + * the average stabilizes with the new running time. We need to check that the + * utilization stays within the range of [0..capacity_orig] and cap it if + * necessary. Without utilization capping, a group could be seen as overloaded + * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of + * available capacity. We allow utilization to overshoot capacity_curr (but not + * capacity_orig) as it useful for predicting the capacity required after task + * migrations (scheduler-driven DVFS). + */ +static inline unsigned long __cpu_util(int cpu, int delta) +{ + unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; + unsigned long capacity = capacity_orig_of(cpu); + + delta += util; + if (delta < 0) + return 0; + + return (delta >= capacity) ? capacity : delta; +} + +static inline unsigned long cpu_util(int cpu) +{ + return __cpu_util(cpu, 0); +} + +/* + * Returns the current capacity of cpu after applying both + * cpu and freq scaling. + */ +static inline unsigned long capacity_curr_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity_orig * + arch_scale_freq_capacity(NULL, cpu) + >> SCHED_CAPACITY_SHIFT; +} + +#endif + #ifdef CONFIG_CPU_FREQ_GOV_SCHED +#define capacity_max SCHED_CAPACITY_SCALE extern unsigned int capacity_margin; extern struct static_key __sched_freq; -- GitLab From bb5fca240365e7e8fd59e2bbca8ed9c834cf7f2f Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Wed, 25 Nov 2015 15:59:25 -0800 Subject: [PATCH 1148/1442] ANDROID: sched/cpufreq_sched: add trace events Trace events will aid in debugging, profiling and tuning. Signed-off-by: Steve Muckle Signed-off-by: Andres Oportus --- include/trace/events/cpufreq_sched.h | 87 ++++++++++++++++++++++++++++ kernel/sched/cpufreq_sched.c | 9 +++ 2 files changed, 96 insertions(+) create mode 100644 include/trace/events/cpufreq_sched.h diff --git a/include/trace/events/cpufreq_sched.h b/include/trace/events/cpufreq_sched.h new file mode 100644 index 000000000000..a46cd088e969 --- /dev/null +++ b/include/trace/events/cpufreq_sched.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2015 Steve Muckle + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cpufreq_sched + +#if !defined(_TRACE_CPUFREQ_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_CPUFREQ_SCHED_H + +#include +#include + +TRACE_EVENT(cpufreq_sched_throttled, + TP_PROTO(unsigned int rem), + TP_ARGS(rem), + TP_STRUCT__entry( + __field( unsigned int, rem) + ), + TP_fast_assign( + __entry->rem = rem; + ), + TP_printk("throttled - %d usec remaining", __entry->rem) +); + +TRACE_EVENT(cpufreq_sched_request_opp, + TP_PROTO(int cpu, + unsigned long capacity, + unsigned int freq_new, + unsigned int requested_freq), + TP_ARGS(cpu, capacity, freq_new, requested_freq), + TP_STRUCT__entry( + __field( int, cpu) + __field( unsigned long, capacity) + __field( unsigned int, freq_new) + __field( unsigned int, requested_freq) + ), + TP_fast_assign( + __entry->cpu = cpu; + __entry->capacity = capacity; + __entry->freq_new = freq_new; + __entry->requested_freq = requested_freq; + ), + TP_printk("cpu %d cap change, cluster cap request %ld => OPP %d " + "(cur %d)", + __entry->cpu, __entry->capacity, __entry->freq_new, + __entry->requested_freq) +); + +TRACE_EVENT(cpufreq_sched_update_capacity, + TP_PROTO(int cpu, + bool request, + struct sched_capacity_reqs *scr, + unsigned long new_capacity), + TP_ARGS(cpu, request, scr, new_capacity), + TP_STRUCT__entry( + __field( int, cpu) + __field( bool, request) + __field( unsigned long, cfs) + __field( unsigned long, rt) + __field( unsigned long, dl) + __field( unsigned long, total) + __field( unsigned long, new_total) + ), + TP_fast_assign( + __entry->cpu = cpu; + __entry->request = request; + __entry->cfs = scr->cfs; + __entry->rt = scr->rt; + __entry->dl = scr->dl; + __entry->total = scr->total; + __entry->new_total = new_capacity; + ), + TP_printk("cpu=%d set_cap=%d cfs=%ld rt=%ld dl=%ld old_tot=%ld " + "new_tot=%ld", + __entry->cpu, __entry->request, __entry->cfs, __entry->rt, + __entry->dl, __entry->total, __entry->new_total) +); + +#endif /* _TRACE_CPUFREQ_SCHED_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c index 71b9f14875d4..6d270ae89057 100644 --- a/kernel/sched/cpufreq_sched.c +++ b/kernel/sched/cpufreq_sched.c @@ -14,6 +14,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + #include "sched.h" #define THROTTLE_NSEC 50000000 /* 50ms default */ @@ -78,6 +81,7 @@ static bool finish_last_request(struct gov_data *gd) int usec_left = ktime_to_ns(ktime_sub(gd->throttle, now)); usec_left /= NSEC_PER_USEC; + trace_cpufreq_sched_throttled(usec_left); usleep_range(usec_left, usec_left + 100); now = ktime_get(); if (ktime_after(now, gd->throttle)) @@ -183,6 +187,9 @@ static void update_fdomain_capacity_request(int cpu) index_new = cpufreq_frequency_table_target(policy, freq_new, CPUFREQ_RELATION_L); freq_new = policy->freq_table[index_new].frequency; + trace_cpufreq_sched_request_opp(cpu, capacity, freq_new, + gd->requested_freq); + if (freq_new == gd->requested_freq) goto out; @@ -219,6 +226,8 @@ void update_cpu_capacity_request(int cpu, bool request) if (new_capacity == scr->total) return; + trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity); + scr->total = new_capacity; if (request) update_fdomain_capacity_request(cpu); -- GitLab From 795949dad190d044831079d4455674133063d7fc Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Tue, 20 Oct 2015 10:46:26 +0200 Subject: [PATCH 1149/1442] ANDROID: sched: remove call of sched_avg_update from sched_rt_avg_update rt_avg is only used to scale the available CPU's capacity for CFS tasks. As the update of this scaling is done during periodic load balance, we only have to ensure that sched_avg_update has been called before any periodic load balancing. This requirement is already fulfilled by __update_cpu_load so the call in sched_rt_avg_update, which is part of the hotpath, is useless. Signed-off-by: Vincent Guittot Signed-off-by: Steve Muckle Signed-off-by: Andres Oportus --- kernel/sched/sched.h | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7c59c9b4e7ea..7de37eb85b3c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1643,7 +1643,6 @@ static inline void set_dl_cpu_capacity(int cpu, bool request, static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq)); - sched_avg_update(rq); } #else static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } -- GitLab From 871a9ff1f941f101867dcb7b374f2de062b8b884 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Fri, 11 Dec 2015 11:55:51 +0000 Subject: [PATCH 1150/1442] ANDROID: fixup! sched: scheduler-driven cpu frequency selection Signed-off-by: Juri Lelli Signed-off-by: Andres Oportus --- kernel/sched/cpufreq_sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c index 6d270ae89057..2e74f9732a36 100644 --- a/kernel/sched/cpufreq_sched.c +++ b/kernel/sched/cpufreq_sched.c @@ -119,9 +119,9 @@ static int cpufreq_sched_thread(void *data) } do { - set_current_state(TASK_INTERRUPTIBLE); new_request = gd->requested_freq; if (new_request == last_request) { + set_current_state(TASK_INTERRUPTIBLE); schedule(); } else { /* -- GitLab From afe22aa30ab0d936d8916de9dc97b583090157aa Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Tue, 30 Jun 2015 12:03:26 +0100 Subject: [PATCH 1151/1442] ANDROID: sched/tune: add detailed documentation The topic of a single simple power-performance tunable, that is wholly scheduler centric, and has well defined and predictable properties has come up on several occasions in the past. With techniques such as a scheduler driven DVFS, we now have a good framework for implementing such a tunable. This patch provides a detailed description of the motivations and design decisions behind the implementation of the SchedTune. cc: Jonathan Corbet cc: linux-doc@vger.kernel.org Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- Documentation/scheduler/sched-tune.txt | 366 +++++++++++++++++++++++++ 1 file changed, 366 insertions(+) create mode 100644 Documentation/scheduler/sched-tune.txt diff --git a/Documentation/scheduler/sched-tune.txt b/Documentation/scheduler/sched-tune.txt new file mode 100644 index 000000000000..9bd2231c01b1 --- /dev/null +++ b/Documentation/scheduler/sched-tune.txt @@ -0,0 +1,366 @@ + Central, scheduler-driven, power-performance control + (EXPERIMENTAL) + +Abstract +======== + +The topic of a single simple power-performance tunable, that is wholly +scheduler centric, and has well defined and predictable properties has come up +on several occasions in the past [1,2]. With techniques such as a scheduler +driven DVFS [3], we now have a good framework for implementing such a tunable. +This document describes the overall ideas behind its design and implementation. + + +Table of Contents +================= + +1. Motivation +2. Introduction +3. Signal Boosting Strategy +4. OPP selection using boosted CPU utilization +5. Per task group boosting +6. Question and Answers + - What about "auto" mode? + - What about boosting on a congested system? + - How CPUs are boosted when we have tasks with multiple boost values? +7. References + + +1. Motivation +============= + +Sched-DVFS [3] is a new event-driven cpufreq governor which allows the +scheduler to select the optimal DVFS operating point (OPP) for running a task +allocated to a CPU. The introduction of sched-DVFS enables running workloads at +the most energy efficient OPPs. + +However, sometimes it may be desired to intentionally boost the performance of +a workload even if that could imply a reasonable increase in energy +consumption. For example, in order to reduce the response time of a task, we +may want to run the task at a higher OPP than the one that is actually required +by it's CPU bandwidth demand. + +This last requirement is especially important if we consider that one of the +main goals of the sched-DVFS component is to replace all currently available +CPUFreq policies. Since sched-DVFS is event based, as opposed to the sampling +driven governors we currently have, it is already more responsive at selecting +the optimal OPP to run tasks allocated to a CPU. However, just tracking the +actual task load demand may not be enough from a performance standpoint. For +example, it is not possible to get behaviors similar to those provided by the +"performance" and "interactive" CPUFreq governors. + +This document describes an implementation of a tunable, stacked on top of the +sched-DVFS which extends its functionality to support task performance +boosting. + +By "performance boosting" we mean the reduction of the time required to +complete a task activation, i.e. the time elapsed from a task wakeup to its +next deactivation (e.g. because it goes back to sleep or it terminates). For +example, if we consider a simple periodic task which executes the same workload +for 5[s] every 20[s] while running at a certain OPP, a boosted execution of +that task must complete each of its activations in less than 5[s]. + +A previous attempt [5] to introduce such a boosting feature has not been +successful mainly because of the complexity of the proposed solution. The +approach described in this document exposes a single simple interface to +user-space. This single tunable knob allows the tuning of system wide +scheduler behaviours ranging from energy efficiency at one end through to +incremental performance boosting at the other end. This first tunable affects +all tasks. However, a more advanced extension of the concept is also provided +which uses CGroups to boost the performance of only selected tasks while using +the energy efficient default for all others. + +The rest of this document introduces in more details the proposed solution +which has been named SchedTune. + + +2. Introduction +=============== + +SchedTune exposes a simple user-space interface with a single power-performance +tunable: + + /proc/sys/kernel/sched_cfs_boost + +This permits expressing a boost value as an integer in the range [0..100]. + +A value of 0 (default) configures the CFS scheduler for maximum energy +efficiency. This means that sched-DVFS runs the tasks at the minimum OPP +required to satisfy their workload demand. +A value of 100 configures scheduler for maximum performance, which translates +to the selection of the maximum OPP on that CPU. + +The range between 0 and 100 can be set to satisfy other scenarios suitably. For +example to satisfy interactive response or depending on other system events +(battery level etc). + +A CGroup based extension is also provided, which permits further user-space +defined task classification to tune the scheduler for different goals depending +on the specific nature of the task, e.g. background vs interactive vs +low-priority. + +The overall design of the SchedTune module is built on top of "Per-Entity Load +Tracking" (PELT) signals and sched-DVFS by introducing a bias on the Operating +Performance Point (OPP) selection. +Each time a task is allocated on a CPU, sched-DVFS has the opportunity to tune +the operating frequency of that CPU to better match the workload demand. The +selection of the actual OPP being activated is influenced by the global boost +value, or the boost value for the task CGroup when in use. + +This simple biasing approach leverages existing frameworks, which means minimal +modifications to the scheduler, and yet it allows to achieve a range of +different behaviours all from a single simple tunable knob. +The only new concept introduced is that of signal boosting. + + +3. Signal Boosting Strategy +=========================== + +The whole PELT machinery works based on the value of a few load tracking signals +which basically track the CPU bandwidth requirements for tasks and the capacity +of CPUs. The basic idea behind the SchedTune knob is to artificially inflate +some of these load tracking signals to make a task or RQ appears more demanding +that it actually is. + +Which signals have to be inflated depends on the specific "consumer". However, +independently from the specific (signal, consumer) pair, it is important to +define a simple and possibly consistent strategy for the concept of boosting a +signal. + +A boosting strategy defines how the "abstract" user-space defined +sched_cfs_boost value is translated into an internal "margin" value to be added +to a signal to get its inflated value: + + margin := boosting_strategy(sched_cfs_boost, signal) + boosted_signal := signal + margin + +Different boosting strategies were identified and analyzed before selecting the +one found to be most effective. + +Signal Proportional Compensation (SPC) +-------------------------------------- + +In this boosting strategy the sched_cfs_boost value is used to compute a +margin which is proportional to the complement of the original signal. +When a signal has a maximum possible value, its complement is defined as +the delta from the actual value and its possible maximum. + +Since the tunable implementation uses signals which have SCHED_LOAD_SCALE as +the maximum possible value, the margin becomes: + + margin := sched_cfs_boost * (SCHED_LOAD_SCALE - signal) + +Using this boosting strategy: +- a 100% sched_cfs_boost means that the signal is scaled to the maximum value +- each value in the range of sched_cfs_boost effectively inflates the signal in + question by a quantity which is proportional to the maximum value. + +For example, by applying the SPC boosting strategy to the selection of the OPP +to run a task it is possible to achieve these behaviors: + +- 0% boosting: run the task at the minimum OPP required by its workload +- 100% boosting: run the task at the maximum OPP available for the CPU +- 50% boosting: run at the half-way OPP between minimum and maximum + +Which means that, at 50% boosting, a task will be scheduled to run at half of +the maximum theoretically achievable performance on the specific target +platform. + +A graphical representation of an SPC boosted signal is represented in the +following figure where: + a) "-" represents the original signal + b) "b" represents a 50% boosted signal + c) "p" represents a 100% boosted signal + + + ^ + | SCHED_LOAD_SCALE + +-----------------------------------------------------------------+ + |pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp + | + | boosted_signal + | bbbbbbbbbbbbbbbbbbbbbbbb + | + | original signal + | bbbbbbbbbbbbbbbbbbbbbbbb+----------------------+ + | | + |bbbbbbbbbbbbbbbbbb | + | | + | | + | | + | +-----------------------+ + | | + | | + | | + |------------------+ + | + | + +-----------------------------------------------------------------------> + +The plot above shows a ramped load signal (titled 'original_signal') and it's +boosted equivalent. For each step of the original signal the boosted signal +corresponding to a 50% boost is midway from the original signal and the upper +bound. Boosting by 100% generates a boosted signal which is always saturated to +the upper bound. + + +4. OPP selection using boosted CPU utilization +============================================== + +It is worth calling out that the implementation does not introduce any new load +signals. Instead, it provides an API to tune existing signals. This tuning is +done on demand and only in scheduler code paths where it is sensible to do so. +The new API calls are defined to return either the default signal or a boosted +one, depending on the value of sched_cfs_boost. This is a clean an non invasive +modification of the existing existing code paths. + +The signal representing a CPU's utilization is boosted according to the +previously described SPC boosting strategy. To sched-DVFS, this allows a CPU +(ie CFS run-queue) to appear more used then it actually is. + +Thus, with the sched_cfs_boost enabled we have the following main functions to +get the current utilization of a CPU: + + cpu_util() + boosted_cpu_util() + +The new boosted_cpu_util() is similar to the first but returns a boosted +utilization signal which is a function of the sched_cfs_boost value. + +This function is used in the CFS scheduler code paths where sched-DVFS needs to +decide the OPP to run a CPU at. +For example, this allows selecting the highest OPP for a CPU which has +the boost value set to 100%. + + +5. Per task group boosting +========================== + +The availability of a single knob which is used to boost all tasks in the +system is certainly a simple solution but it quite likely doesn't fit many +utilization scenarios, especially in the mobile device space. + +For example, on battery powered devices there usually are many background +services which are long running and need energy efficient scheduling. On the +other hand, some applications are more performance sensitive and require an +interactive response and/or maximum performance, regardless of the energy cost. +To better service such scenarios, the SchedTune implementation has an extension +that provides a more fine grained boosting interface. + +A new CGroup controller, namely "schedtune", could be enabled which allows to +defined and configure task groups with different boosting values. +Tasks that require special performance can be put into separate CGroups. +The value of the boost associated with the tasks in this group can be specified +using a single knob exposed by the CGroup controller: + + schedtune.boost + +This knob allows the definition of a boost value that is to be used for +SPC boosting of all tasks attached to this group. + +The current schedtune controller implementation is really simple and has these +main characteristics: + + 1) It is only possible to create 1 level depth hierarchies + + The root control groups define the system-wide boost value to be applied + by default to all tasks. Its direct subgroups are named "boost groups" and + they define the boost value for specific set of tasks. + Further nested subgroups are not allowed since they do not have a sensible + meaning from a user-space standpoint. + + 2) It is possible to define only a limited number of "boost groups" + + This number is defined at compile time and by default configured to 16. + This is a design decision motivated by two main reasons: + a) In a real system we do not expect utilization scenarios with more then few + boost groups. For example, a reasonable collection of groups could be + just "background", "interactive" and "performance". + b) It simplifies the implementation considerably, especially for the code + which has to compute the per CPU boosting once there are multiple + RUNNABLE tasks with different boost values. + +Such a simple design should allow servicing the main utilization scenarios identified +so far. It provides a simple interface which can be used to manage the +power-performance of all tasks or only selected tasks. +Moreover, this interface can be easily integrated by user-space run-times (e.g. +Android, ChromeOS) to implement a QoS solution for task boosting based on tasks +classification, which has been a long standing requirement. + +Setup and usage +--------------- + +0. Use a kernel with CGROUP_SCHEDTUNE support enabled + +1. Check that the "schedtune" CGroup controller is available: + + root@linaro-nano:~# cat /proc/cgroups + #subsys_name hierarchy num_cgroups enabled + cpuset 0 1 1 + cpu 0 1 1 + schedtune 0 1 1 + +2. Mount a tmpfs to create the CGroups mount point (Optional) + + root@linaro-nano:~# sudo mount -t tmpfs cgroups /sys/fs/cgroup + +3. Mount the "schedtune" controller + + root@linaro-nano:~# mkdir /sys/fs/cgroup/stune + root@linaro-nano:~# sudo mount -t cgroup -o schedtune stune /sys/fs/cgroup/stune + +4. Setup the system-wide boost value (Optional) + + If not configured the root control group has a 0% boost value, which + basically disables boosting for all tasks in the system thus running in + an energy-efficient mode. + + root@linaro-nano:~# echo $SYSBOOST > /sys/fs/cgroup/stune/schedtune.boost + +5. Create task groups and configure their specific boost value (Optional) + + For example here we create a "performance" boost group configure to boost + all its tasks to 100% + + root@linaro-nano:~# mkdir /sys/fs/cgroup/stune/performance + root@linaro-nano:~# echo 100 > /sys/fs/cgroup/stune/performance/schedtune.boost + +6. Move tasks into the boost group + + For example, the following moves the tasks with PID $TASKPID (and all its + threads) into the "performance" boost group. + + root@linaro-nano:~# echo "TASKPID > /sys/fs/cgroup/stune/performance/cgroup.procs + +This simple configuration allows only the threads of the $TASKPID task to run, +when needed, at the highest OPP in the most capable CPU of the system. + + +6. Question and Answers +======================= + +What about "auto" mode? +----------------------- + +The 'auto' mode as described in [5] can be implemented by interfacing SchedTune +with some suitable user-space element. This element could use the exposed +system-wide or cgroup based interface. + +How are multiple groups of tasks with different boost values managed? +--------------------------------------------------------------------- + +The current SchedTune implementation keeps track of the boosted RUNNABLE tasks +on a CPU. Once sched-DVFS selects the OPP to run a CPU at, the CPU utilization +is boosted with a value which is the maximum of the boost values of the +currently RUNNABLE tasks in its RQ. + +This allows sched-DVFS to boost a CPU only while there are boosted tasks ready +to run and switch back to the energy efficient mode as soon as the last boosted +task is dequeued. + + +7. References +============= +[1] http://lwn.net/Articles/552889 +[2] http://lkml.org/lkml/2012/5/18/91 +[3] http://lkml.org/lkml/2015/6/26/620 -- GitLab From 69fa4c768a73c25b9b3f26d074295c53f839a57c Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Mon, 22 Jun 2015 18:11:44 +0100 Subject: [PATCH 1152/1442] ANDROID: sched/tune: add sysctl interface to define a boost value The current (CFS) scheduler implementation does not allow "to boost" tasks performance by running them at a higher OPP compared to the minimum required to meet their workload demands. To support tasks performance boosting the scheduler should provide a "knob" which allows to tune how much the system is going to be optimised for energy efficiency vs performance. This patch is the first of a series which provides a simple interface to define a tuning knob. One system-wide "boost" tunable is exposed via: /proc/sys/kernel/sched_cfs_boost which can be configured in the range [0..100], to define a percentage where: - 0% boost requires to operate in "standard" mode by scheduling tasks at the minimum capacities required by the workload demand - 100% boost requires to push at maximum the task performances, "regardless" of the incurred energy consumption A boost value in between these two boundaries is used to bias the power/performance trade-off, the higher the boost value the more the scheduler is biased toward performance boosting instead of energy efficiency. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- include/linux/sched/sysctl.h | 16 ++++++++++++++++ init/Kconfig | 26 ++++++++++++++++++++++++++ kernel/sched/Makefile | 1 + kernel/sched/tune.c | 16 ++++++++++++++++ kernel/sysctl.c | 11 +++++++++++ 5 files changed, 70 insertions(+) create mode 100644 kernel/sched/tune.c diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 22db1e63707e..2d2cf5daca63 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -56,6 +56,22 @@ extern int sysctl_sched_rt_runtime; extern unsigned int sysctl_sched_cfs_bandwidth_slice; #endif +#ifdef CONFIG_SCHED_TUNE +extern unsigned int sysctl_sched_cfs_boost; +int sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, + loff_t *ppos); +static inline unsigned int get_sysctl_sched_cfs_boost(void) +{ + return sysctl_sched_cfs_boost; +} +#else +static inline unsigned int get_sysctl_sched_cfs_boost(void) +{ + return 0; +} +#endif + #ifdef CONFIG_SCHED_AUTOGROUP extern unsigned int sysctl_sched_autogroup_enabled; #endif diff --git a/init/Kconfig b/init/Kconfig index 34407f15e6d3..cc0e213f5d7d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1248,6 +1248,32 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. +config SCHED_TUNE + bool "Boosting for CFS tasks (EXPERIMENTAL)" + help + This option enables the system-wide support for task boosting. + When this support is enabled a new sysctl interface is exposed to + userspace via: + /proc/sys/kernel/sched_cfs_boost + which allows to set a system-wide boost value in range [0..100]. + + The currently boosting strategy is implemented in such a way that: + - a 0% boost value requires to operate in "standard" mode by + scheduling all tasks at the minimum capacities required by their + workload demand + - a 100% boost value requires to push at maximum the task + performances, "regardless" of the incurred energy consumption + + A boost value in between these two boundaries is used to bias the + power/performance trade-off, the higher the boost value the more the + scheduler is biased toward performance boosting instead of energy + efficiency. + + Since this support exposes a single system-wide knob, the specified + boost value is applied to all (CFS) tasks in the system. + + If unsure, say N. + config SYSFS_DEPRECATED bool "Enable deprecated sysfs features to support old userspace tools" depends on SYSFS diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 5aa8742f49b5..86fd8b61ffd1 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o +obj-$(CONFIG_SCHED_TUNE) += tune.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c new file mode 100644 index 000000000000..a93af9c2f267 --- /dev/null +++ b/kernel/sched/tune.c @@ -0,0 +1,16 @@ +#include "sched.h" + +unsigned int sysctl_sched_cfs_boost __read_mostly; + +int +sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (ret || !write) + return ret; + + return 0; +} diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 706309f9ed84..0b00522bbf4b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -449,6 +449,17 @@ static struct ctl_table kern_table[] = { .extra1 = &one, }, #endif +#ifdef CONFIG_SCHED_TUNE + { + .procname = "sched_cfs_boost", + .data = &sysctl_sched_cfs_boost, + .maxlen = sizeof(sysctl_sched_cfs_boost), + .mode = 0644, + .proc_handler = &sysctl_sched_cfs_boost_handler, + .extra1 = &zero, + .extra2 = &one_hundred, + }, +#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", -- GitLab From b08685bc910093abd11eabe1e9734d0fe2c05271 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Mon, 22 Jun 2015 18:32:36 +0100 Subject: [PATCH 1153/1442] ANDROID: sched/fair: add function to convert boost value into "margin" The basic idea of the boost knob is to "artificially inflate" a signal to make a task or logical CPU appears more demanding than it actually is. Independently from the specific signal, a consistent and possibly simple semantic for the concept of "signal boosting" must define: 1. how we translate the boost percentage into a "margin" value to be added to the original signal to inflate 2. what is the meaning of a boost value from a user-space perspective This patch provides the implementation of a possible boost semantic, named "Signal Proportional Compensation" (SPC), where the boost percentage (BP) is used to compute a margin (M) which is proportional to the complement of the original signal (OS): M = BP * (SCHED_LOAD_SCALE - OS) The computed margin then added to the OS to obtain the Boosted Signal (BS) BS = OS + M The proposed boost semantic has these main features: - each signal gets a boost which is proportional to its delta with respect to the maximum available capacity in the system (i.e. SCHED_LOAD_SCALE) - a 100% boosting has a clear understanding from a user-space perspective, since it means simply to run (possibly) "all" tasks at the max OPP - each boosting value means to improve the task performance by a quantity which is proportional to the maximum achievable performance on that system Thus this semantics is somehow forcing a behaviour which is: 50% boosting means to run at half-way between the current and the maximum performance which a task could achieve on that system This patch provides the code to implement a fast integer division to convert a boost percentage (BP) value into a margin (M). NOTE: this code is suitable for all signals operating in range [0..SCHED_LOAD_SCALE] cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4b4f5fcd7180..7df5da608044 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5533,6 +5533,44 @@ static bool cpu_overutilized(int cpu) return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); } +#ifdef CONFIG_SCHED_TUNE + +static unsigned long +schedtune_margin(unsigned long signal, unsigned long boost) +{ + unsigned long long margin = 0; + + /* + * Signal proportional compensation (SPC) + * + * The Boost (B) value is used to compute a Margin (M) which is + * proportional to the complement of the original Signal (S): + * M = B * (SCHED_LOAD_SCALE - S) + * The obtained M could be used by the caller to "boost" S. + */ + margin = SCHED_CAPACITY_SCALE - signal; + margin *= boost; + + /* + * Fast integer division by constant: + * Constant : (C) = 100 + * Precision : 0.1% (P) = 0.1 + * Reference : C * 100 / P (R) = 100000 + * + * Thus: + * Shift bits : ceil(log(R,2)) (S) = 17 + * Mult const : round(2^S/C) (M) = 1311 + * + * + */ + margin *= 1311; + margin >>= 17; + + return margin; +} + +#endif /* CONFIG_SCHED_TUNE */ + /* * find_idlest_group finds and returns the least busy CPU group within the * domain. -- GitLab From caa24e4c914c745d2135f94f56c41a46a2dd8f9e Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Fri, 26 Jun 2015 09:55:06 +0100 Subject: [PATCH 1154/1442] ANDROID: sched/fair: add boosted CPU usage The CPU usage signal is used by the scheduler as an estimation of the overall bandwidth currently allocated on a CPU. When SchedDVFS is in use, this signal affects the selection of the operating points (OPP) required to accommodate all the workload allocated in a CPU. A convenient way to boost the performance of tasks running on a CPU, which is also little intrusive, is to boost the CPU usage signal each time it is used to select an OPP. This patch introduces a new function: get_boosted_cpu_usage(cpu) to return a boosted value for the usage of a specified CPU. The margin added to the original usage is: 1. computed based on the "boosting strategy" in use 2. proportional to the system-wide boost value defined by provided user-space interface The boosted signal is used by SchedDVFS (transparently) each time it requires to get an estimation of the capacity required for a CPU. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7df5da608044..b78a8d99d10e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4530,6 +4530,7 @@ static inline void hrtick_update(struct rq *rq) static unsigned long capacity_orig_of(int cpu); static unsigned long cpu_util(int cpu); +static inline unsigned long boosted_cpu_util(int cpu); static void update_capacity_of(int cpu) { @@ -4539,7 +4540,8 @@ static void update_capacity_of(int cpu) return; /* Convert scale-invariant capacity to cpu. */ - req_cap = cpu_util(cpu) * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); + req_cap = boosted_cpu_util(cpu); + req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); set_cfs_cpu_capacity(cpu, true, req_cap); } @@ -5569,8 +5571,36 @@ schedtune_margin(unsigned long signal, unsigned long boost) return margin; } +static inline unsigned int +schedtune_cpu_margin(unsigned long util) +{ + unsigned int boost = get_sysctl_sched_cfs_boost(); + + if (boost == 0) + return 0; + + return schedtune_margin(util, boost); +} + +#else /* CONFIG_SCHED_TUNE */ + +static inline unsigned int +schedtune_cpu_margin(unsigned long util) +{ + return 0; +} + #endif /* CONFIG_SCHED_TUNE */ +static inline unsigned long +boosted_cpu_util(int cpu) +{ + unsigned long util = cpu_util(cpu); + unsigned long margin = schedtune_cpu_margin(util); + + return util + margin; +} + /* * find_idlest_group finds and returns the least busy CPU group within the * domain. -- GitLab From ae71030fd5563e029cf6d11926055cb55d64512a Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Tue, 23 Jun 2015 09:17:54 +0100 Subject: [PATCH 1155/1442] ANDROID: sched/tune: add initial support for CGroups based boosting To support task performance boosting, the usage of a single knob has the advantage to be a simple solution, both from the implementation and the usability standpoint. However, on a real system it can be difficult to identify a single value for the knob which fits the needs of multiple different tasks. For example, some kernel threads and/or user-space background services should be better managed the "standard" way while we still want to be able to boost the performance of specific workloads. In order to improve the flexibility of the task boosting mechanism this patch is the first of a small series which extends the previous implementation to introduce a "per task group" support. This first patch introduces just the basic CGroups support, a new "schedtune" CGroups controller is added which allows to configure different boost value for different groups of tasks. To keep the implementation simple but still effective for a boosting strategy, the new controller: 1. allows only a two layer hierarchy 2. supports only a limited number of boost groups A two layer hierarchy allows to place each task either: a) in the root control group thus being subject to a system-wide boosting value b) in a child of the root group thus being subject to the specific boost value defined by that "boost group" The limited number of "boost groups" supported is mainly motivated by the observation that in a real system it could be useful to have only few classes of tasks which deserve different treatment. For example, background vs foreground or interactive vs low-priority. As an additional benefit, a limited number of boost groups allows also to have a simpler implementation especially for the code required to compute the boost value for CPUs which have runnable tasks belonging to different boost groups. cc: Tejun Heo cc: Li Zefan cc: Johannes Weiner cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- include/linux/cgroup_subsys.h | 4 + init/Kconfig | 76 ++++++++++++ kernel/sched/tune.c | 223 ++++++++++++++++++++++++++++++++++ kernel/sysctl.c | 4 + 4 files changed, 307 insertions(+) diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 0df0336acee9..7f4a2a5a2a77 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -20,6 +20,10 @@ SUBSYS(cpu) SUBSYS(cpuacct) #endif +#if IS_ENABLED(CONFIG_CGROUP_SCHEDTUNE) +SUBSYS(schedtune) +#endif + #if IS_ENABLED(CONFIG_BLK_CGROUP) SUBSYS(io) #endif diff --git a/init/Kconfig b/init/Kconfig index cc0e213f5d7d..e8c2989848b0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -971,6 +971,82 @@ menuconfig CGROUPS if CGROUPS +config CGROUP_DEBUG + bool "Example debug cgroup subsystem" + default n + help + This option enables a simple cgroup subsystem that + exports useful debugging information about the cgroups + framework. + + Say N if unsure. + +config CGROUP_FREEZER + bool "Freezer cgroup subsystem" + help + Provides a way to freeze and unfreeze all tasks in a + cgroup. + +config CGROUP_PIDS + bool "PIDs cgroup subsystem" + help + Provides enforcement of process number limits in the scope of a + cgroup. Any attempt to fork more processes than is allowed in the + cgroup will fail. PIDs are fundamentally a global resource because it + is fairly trivial to reach PID exhaustion before you reach even a + conservative kmemcg limit. As a result, it is possible to grind a + system to halt without being limited by other cgroup policies. The + PIDs cgroup subsystem is designed to stop this from happening. + + It should be noted that organisational operations (such as attaching + to a cgroup hierarchy will *not* be blocked by the PIDs subsystem), + since the PIDs limit only affects a process's ability to fork, not to + attach to a cgroup. + +config CGROUP_DEVICE + bool "Device controller for cgroups" + help + Provides a cgroup implementing whitelists for devices which + a process in the cgroup can mknod or open. + +config CPUSETS + bool "Cpuset support" + help + This option will let you create and manage CPUSETs which + allow dynamically partitioning a system into sets of CPUs and + Memory Nodes and assigning tasks to run only within those sets. + This is primarily useful on large SMP or NUMA systems. + + Say N if unsure. + +config PROC_PID_CPUSET + bool "Include legacy /proc//cpuset file" + depends on CPUSETS + default y + +config CGROUP_CPUACCT + bool "Simple CPU accounting cgroup subsystem" + help + Provides a simple Resource Controller for monitoring the + total CPU consumed by the tasks in a cgroup. + +config CGROUP_SCHEDTUNE + bool "CFS tasks boosting cgroup subsystem (EXPERIMENTAL)" + depends on SCHED_TUNE + help + This option provides the "schedtune" controller which improves the + flexibility of the task boosting mechanism by introducing the support + to define "per task" boost values. + + This new controller: + 1. allows only a two layers hierarchy, where the root defines the + system-wide boost value and its direct childrens define each one a + different "class of tasks" to be boosted with a different value + 2. supports up to 16 different task classes, each one which could be + configured with a different boost value + + Say N if unsure. + config PAGE_COUNTER bool diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index a93af9c2f267..95bc8b87c6d4 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -1,7 +1,230 @@ +#include +#include +#include +#include +#include + #include "sched.h" unsigned int sysctl_sched_cfs_boost __read_mostly; +#ifdef CONFIG_CGROUP_SCHEDTUNE + +/* + * EAS scheduler tunables for task groups. + */ + +/* SchdTune tunables for a group of tasks */ +struct schedtune { + /* SchedTune CGroup subsystem */ + struct cgroup_subsys_state css; + + /* Boost group allocated ID */ + int idx; + + /* Boost value for tasks on that SchedTune CGroup */ + int boost; + +}; + +static inline struct schedtune *css_st(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct schedtune, css) : NULL; +} + +static inline struct schedtune *task_schedtune(struct task_struct *tsk) +{ + return css_st(task_css(tsk, schedtune_cgrp_id)); +} + +static inline struct schedtune *parent_st(struct schedtune *st) +{ + return css_st(st->css.parent); +} + +/* + * SchedTune root control group + * The root control group is used to defined a system-wide boosting tuning, + * which is applied to all tasks in the system. + * Task specific boost tuning could be specified by creating and + * configuring a child control group under the root one. + * By default, system-wide boosting is disabled, i.e. no boosting is applied + * to tasks which are not into a child control group. + */ +static struct schedtune +root_schedtune = { + .boost = 0, +}; + +/* + * Maximum number of boost groups to support + * When per-task boosting is used we still allow only limited number of + * boost groups for two main reasons: + * 1. on a real system we usually have only few classes of workloads which + * make sense to boost with different values (e.g. background vs foreground + * tasks, interactive vs low-priority tasks) + * 2. a limited number allows for a simpler and more memory/time efficient + * implementation especially for the computation of the per-CPU boost + * value + */ +#define BOOSTGROUPS_COUNT 4 + +/* Array of configured boostgroups */ +static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = { + &root_schedtune, + NULL, +}; + +/* SchedTune boost groups + * Keep track of all the boost groups which impact on CPU, for example when a + * CPU has two RUNNABLE tasks belonging to two different boost groups and thus + * likely with different boost values. + * Since on each system we expect only a limited number of boost groups, here + * we use a simple array to keep track of the metrics required to compute the + * maximum per-CPU boosting value. + */ +struct boost_groups { + /* Maximum boost value for all RUNNABLE tasks on a CPU */ + unsigned boost_max; + struct { + /* The boost for tasks on that boost group */ + unsigned boost; + /* Count of RUNNABLE tasks on that boost group */ + unsigned tasks; + } group[BOOSTGROUPS_COUNT]; +}; + +/* Boost groups affecting each CPU in the system */ +DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups); + +static u64 +boost_read(struct cgroup_subsys_state *css, struct cftype *cft) +{ + struct schedtune *st = css_st(css); + + return st->boost; +} + +static int +boost_write(struct cgroup_subsys_state *css, struct cftype *cft, + u64 boost) +{ + struct schedtune *st = css_st(css); + + if (boost < 0 || boost > 100) + return -EINVAL; + + st->boost = boost; + if (css == &root_schedtune.css) + sysctl_sched_cfs_boost = boost; + + return 0; +} + +static struct cftype files[] = { + { + .name = "boost", + .read_u64 = boost_read, + .write_u64 = boost_write, + }, + { } /* terminate */ +}; + +static int +schedtune_boostgroup_init(struct schedtune *st) +{ + /* Keep track of allocated boost groups */ + allocated_group[st->idx] = st; + + return 0; +} + +static int +schedtune_init(void) +{ + struct boost_groups *bg; + int cpu; + + /* Initialize the per CPU boost groups */ + for_each_possible_cpu(cpu) { + bg = &per_cpu(cpu_boost_groups, cpu); + memset(bg, 0, sizeof(struct boost_groups)); + } + + pr_info(" schedtune configured to support %d boost groups\n", + BOOSTGROUPS_COUNT); + return 0; +} + +static struct cgroup_subsys_state * +schedtune_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct schedtune *st; + int idx; + + if (!parent_css) { + schedtune_init(); + return &root_schedtune.css; + } + + /* Allow only single level hierachies */ + if (parent_css != &root_schedtune.css) { + pr_err("Nested SchedTune boosting groups not allowed\n"); + return ERR_PTR(-ENOMEM); + } + + /* Allow only a limited number of boosting groups */ + for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) + if (!allocated_group[idx]) + break; + if (idx == BOOSTGROUPS_COUNT) { + pr_err("Trying to create more than %d SchedTune boosting groups\n", + BOOSTGROUPS_COUNT); + return ERR_PTR(-ENOSPC); + } + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto out; + + /* Initialize per CPUs boost group support */ + st->idx = idx; + if (schedtune_boostgroup_init(st)) + goto release; + + return &st->css; + +release: + kfree(st); +out: + return ERR_PTR(-ENOMEM); +} + +static void +schedtune_boostgroup_release(struct schedtune *st) +{ + /* Keep track of allocated boost groups */ + allocated_group[st->idx] = NULL; +} + +static void +schedtune_css_free(struct cgroup_subsys_state *css) +{ + struct schedtune *st = css_st(css); + + schedtune_boostgroup_release(st); + kfree(st); +} + +struct cgroup_subsys schedtune_cgrp_subsys = { + .css_alloc = schedtune_css_alloc, + .css_free = schedtune_css_free, + .legacy_cftypes = files, + .early_init = 1, +}; + +#endif /* CONFIG_CGROUP_SCHEDTUNE */ + int sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0b00522bbf4b..bcc012aad7bb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -454,7 +454,11 @@ static struct ctl_table kern_table[] = { .procname = "sched_cfs_boost", .data = &sysctl_sched_cfs_boost, .maxlen = sizeof(sysctl_sched_cfs_boost), +#ifdef CONFIG_CGROUP_SCHEDTUNE + .mode = 0444, +#else .mode = 0644, +#endif .proc_handler = &sysctl_sched_cfs_boost_handler, .extra1 = &zero, .extra2 = &one_hundred, -- GitLab From 9a871ed439f6c115ff5488e5e8287c39a7b5abc6 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Thu, 14 Jan 2016 12:31:35 +0000 Subject: [PATCH 1156/1442] ANDROID: sched/tune: compute and keep track of per CPU boost value When per task boosting is enabled, we could have multiple RUNNABLE tasks which are concurrently scheduled on the same CPU but each one with a different boost value. For example, we could have a scenarios like this: Task SchedTune CGroup Boost Value T1 root 0 T2 low-priority 10 T3 interactive 90 In these conditions we expect a CPU to be configured according to a proper "aggregation" of the required boost values for all the tasks currently scheduled on this CPU. A suitable aggregation function is the one which tracks the MAX boost value for all the tasks RUNNABLE on a CPU. This approach allows to always satisfy the most boost demanding task while at the same time: a) boosting all the concurrently scheduled tasks thus reducing potential co-scheduling side-effects on demanding tasks b) reduce the number of frequency switch requested towards SchedDVFS, thus being more friendly to architectures with slow frequency switching times Every time a task enters/exits the RQ of a CPU the max boost value should be updated considering all the boost groups currently "affecting" that CPU, i.e. which have at least one RUNNABLE task currently allocated on that CPU. This patch introduces the required support to keep track of the boost groups currently affecting CPUs. Thanks to the limited number of boost groups, a small and memory efficient per-cpu array of boost groups values (cpu_boost_groups) is used which is updated for each CPU entry by schedtune_boostgroup_update() but only when a schedtune CGroup boost value is updated. However, this is expected to be a rare operation, perhaps done just one time at system boot time. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/tune.c | 77 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 95bc8b87c6d4..f62386893725 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -97,6 +97,67 @@ struct boost_groups { /* Boost groups affecting each CPU in the system */ DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups); +static void +schedtune_cpu_update(int cpu) +{ + struct boost_groups *bg; + unsigned boost_max; + int idx; + + bg = &per_cpu(cpu_boost_groups, cpu); + + /* The root boost group is always active */ + boost_max = bg->group[0].boost; + for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) { + /* + * A boost group affects a CPU only if it has + * RUNNABLE tasks on that CPU + */ + if (bg->group[idx].tasks == 0) + continue; + boost_max = max(boost_max, bg->group[idx].boost); + } + + bg->boost_max = boost_max; +} + +static int +schedtune_boostgroup_update(int idx, int boost) +{ + struct boost_groups *bg; + int cur_boost_max; + int old_boost; + int cpu; + + /* Update per CPU boost groups */ + for_each_possible_cpu(cpu) { + bg = &per_cpu(cpu_boost_groups, cpu); + + /* + * Keep track of current boost values to compute the per CPU + * maximum only when it has been affected by the new value of + * the updated boost group + */ + cur_boost_max = bg->boost_max; + old_boost = bg->group[idx].boost; + + /* Update the boost value of this boost group */ + bg->group[idx].boost = boost; + + /* Check if this update increase current max */ + if (boost > cur_boost_max && bg->group[idx].tasks) { + bg->boost_max = boost; + continue; + } + + /* Check if this update has decreased current max */ + if (cur_boost_max == old_boost && old_boost > boost) + schedtune_cpu_update(cpu); + } + + return 0; +} + static u64 boost_read(struct cgroup_subsys_state *css, struct cftype *cft) { @@ -118,6 +179,9 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft, if (css == &root_schedtune.css) sysctl_sched_cfs_boost = boost; + /* Update CPU boost */ + schedtune_boostgroup_update(st->idx, st->boost); + return 0; } @@ -133,9 +197,19 @@ static struct cftype files[] = { static int schedtune_boostgroup_init(struct schedtune *st) { + struct boost_groups *bg; + int cpu; + /* Keep track of allocated boost groups */ allocated_group[st->idx] = st; + /* Initialize the per CPU boost groups */ + for_each_possible_cpu(cpu) { + bg = &per_cpu(cpu_boost_groups, cpu); + bg->group[st->idx].boost = 0; + bg->group[st->idx].tasks = 0; + } + return 0; } @@ -203,6 +277,9 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css) static void schedtune_boostgroup_release(struct schedtune *st) { + /* Reset this boost group */ + schedtune_boostgroup_update(st->idx, 0); + /* Keep track of allocated boost groups */ allocated_group[st->idx] = NULL; } -- GitLab From edd28d31f51fb089dda062ba4cec2c2845b14826 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Tue, 7 Jul 2015 15:33:20 +0100 Subject: [PATCH 1157/1442] ANDROID: sched/{fair,tune}: track RUNNABLE tasks impact on per CPU boost value When per-task boosting is enabled, every time a task enters/exits a CPU its boost value could impact the currently selected OPP for that CPU. Thus, the "aggregated" boost value for that CPU potentially needs to be updated to match the current maximum boost value among all the tasks currently RUNNABLE on that CPU. This patch introduces the required support to keep track of which boost groups are impacting a CPU. Each time a task is enqueued/dequeued to/from a CPU its boost group is used to increment a per-cpu counter of RUNNABLE tasks on that CPU. Only when the number of runnable tasks for a specific boost group becomes 1 or 0 the corresponding boost group changes its effects on that CPU, specifically: a) boost_group::tasks == 1: this boost group starts to impact the CPU b) boost_group::tasks == 0: this boost group stops to impact the CPU In each of these two conditions the aggregation function: sched_cpu_update(cpu) could be required to run in order to identify the new maximum boost value required for the CPU. The proposed patch minimizes the number of times the aggregation function is executed while still providing the required support to always boost a CPU to the maximum boost value required by all its currently RUNNABLE tasks. cc: Ingo Molnar cc: Peter Zijlstra Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 17 +++++++--- kernel/sched/tune.c | 82 +++++++++++++++++++++++++++++++++++++++++++++ kernel/sched/tune.h | 23 +++++++++++++ 3 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 kernel/sched/tune.h diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b78a8d99d10e..37f9ca2b349d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -34,6 +34,7 @@ #include #include "sched.h" +#include "tune.h" /* * Targeted preemption latency for CPU-bound tasks: @@ -4604,6 +4605,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cpu_overutilized(rq->cpu)) rq->rd->overutilized = true; + schedtune_enqueue_task(p, cpu_of(rq)); + /* * We want to potentially trigger a freq switch * request only for tasks that are waking up; this is @@ -4672,6 +4675,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (!se) { sub_nr_running(rq, 1); + schedtune_dequeue_task(p, cpu_of(rq)); /* * We want to potentially trigger a freq switch @@ -5572,10 +5576,15 @@ schedtune_margin(unsigned long signal, unsigned long boost) } static inline unsigned int -schedtune_cpu_margin(unsigned long util) +schedtune_cpu_margin(unsigned long util, int cpu) { - unsigned int boost = get_sysctl_sched_cfs_boost(); + unsigned int boost; +#ifdef CONFIG_CGROUP_SCHEDTUNE + boost = schedtune_cpu_boost(cpu); +#else + boost = get_sysctl_sched_cfs_boost(); +#endif if (boost == 0) return 0; @@ -5585,7 +5594,7 @@ schedtune_cpu_margin(unsigned long util) #else /* CONFIG_SCHED_TUNE */ static inline unsigned int -schedtune_cpu_margin(unsigned long util) +schedtune_cpu_margin(unsigned long util, int cpu) { return 0; } @@ -5596,7 +5605,7 @@ static inline unsigned long boosted_cpu_util(int cpu) { unsigned long util = cpu_util(cpu); - unsigned long margin = schedtune_cpu_margin(util); + unsigned long margin = schedtune_cpu_margin(util, cpu); return util + margin; } diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index f62386893725..540b945a01ce 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "sched.h" @@ -158,6 +159,87 @@ schedtune_boostgroup_update(int idx, int boost) return 0; } +static inline void +schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count) +{ + struct boost_groups *bg; + int tasks; + + bg = &per_cpu(cpu_boost_groups, cpu); + + /* Update boosted tasks count while avoiding to make it negative */ + if (task_count < 0 && bg->group[idx].tasks <= -task_count) + bg->group[idx].tasks = 0; + else + bg->group[idx].tasks += task_count; + + /* Boost group activation or deactivation on that RQ */ + tasks = bg->group[idx].tasks; + if (tasks == 1 || tasks == 0) + schedtune_cpu_update(cpu); +} + +/* + * NOTE: This function must be called while holding the lock on the CPU RQ + */ +void schedtune_enqueue_task(struct task_struct *p, int cpu) +{ + struct schedtune *st; + int idx; + + /* + * When a task is marked PF_EXITING by do_exit() it's going to be + * dequeued and enqueued multiple times in the exit path. + * Thus we avoid any further update, since we do not want to change + * CPU boosting while the task is exiting. + */ + if (p->flags & PF_EXITING) + return; + + /* Get task boost group */ + rcu_read_lock(); + st = task_schedtune(p); + idx = st->idx; + rcu_read_unlock(); + + schedtune_tasks_update(p, cpu, idx, 1); +} + +/* + * NOTE: This function must be called while holding the lock on the CPU RQ + */ +void schedtune_dequeue_task(struct task_struct *p, int cpu) +{ + struct schedtune *st; + int idx; + + /* + * When a task is marked PF_EXITING by do_exit() it's going to be + * dequeued and enqueued multiple times in the exit path. + * Thus we avoid any further update, since we do not want to change + * CPU boosting while the task is exiting. + * The last dequeue will be done by cgroup exit() callback. + */ + if (p->flags & PF_EXITING) + return; + + /* Get task boost group */ + rcu_read_lock(); + st = task_schedtune(p); + idx = st->idx; + rcu_read_unlock(); + + schedtune_tasks_update(p, cpu, idx, -1); +} + +int schedtune_cpu_boost(int cpu) +{ + struct boost_groups *bg; + + bg = &per_cpu(cpu_boost_groups, cpu); + return bg->boost_max; +} + static u64 boost_read(struct cgroup_subsys_state *css, struct cftype *cft) { diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h new file mode 100644 index 000000000000..561b5171a19b --- /dev/null +++ b/kernel/sched/tune.h @@ -0,0 +1,23 @@ + +#ifdef CONFIG_SCHED_TUNE + +#ifdef CONFIG_CGROUP_SCHEDTUNE + +int schedtune_cpu_boost(int cpu); + +void schedtune_enqueue_task(struct task_struct *p, int cpu); +void schedtune_dequeue_task(struct task_struct *p, int cpu); + +#else /* CONFIG_CGROUP_SCHEDTUNE */ + +#define schedtune_enqueue_task(task, cpu) do { } while (0) +#define schedtune_dequeue_task(task, cpu) do { } while (0) + +#endif /* CONFIG_CGROUP_SCHEDTUNE */ + +#else /* CONFIG_SCHED_TUNE */ + +#define schedtune_enqueue_task(task, cpu) do { } while (0) +#define schedtune_dequeue_task(task, cpu) do { } while (0) + +#endif /* CONFIG_SCHED_TUNE */ -- GitLab From 9b2b8da324078ed6f0ab95613ba14b3608d7c913 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Thu, 14 Jan 2016 18:31:53 +0000 Subject: [PATCH 1158/1442] ANDROID: sched/fair: add boosted task utilization The task utilization signal, which is derived from PELT signals and properly scaled to be architecture and frequency invariant, is used by EAS as an estimation of the task requirements in terms of CPU bandwidth. When the energy aware scheduler is in use, this signal affects the CPU selection. Thus, a convenient way to bias that decision, which is also little intrusive, is to boost the task utilization signal each time it is required to support them. This patch introduces the new function: boosted_task_util(task) which returns a boosted value for the utilization of the specified task. The margin added to the original utilization is: 1. computed based on the "boosting strategy" in use 2. proportional to boost value defined either by the sysctl interface, when global boosting is in use, or the "taskgroup" value, when per-task boosting is enabled. The boosted signal is used by EAS a. transparently, via its integration into the task_fits() function b. explicitly, in the energy-aware wakeup path Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 42 ++++++++++++++++++++++++++++++++++++++++-- kernel/sched/tune.c | 14 ++++++++++++++ kernel/sched/tune.h | 1 + 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 37f9ca2b349d..2eec4f1e13ff 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5506,11 +5506,13 @@ static inline int task_util(struct task_struct *p) return p->se.avg.util_avg; } +static inline unsigned long boosted_task_util(struct task_struct *task); + static inline bool __task_fits(struct task_struct *p, int cpu, int util) { unsigned long capacity = capacity_of(cpu); - util += task_util(p); + util += boosted_task_util(p); return (capacity * 1024) > (util * capacity_margin); } @@ -5591,6 +5593,27 @@ schedtune_cpu_margin(unsigned long util, int cpu) return schedtune_margin(util, boost); } +static inline unsigned long +schedtune_task_margin(struct task_struct *task) +{ + unsigned int boost; + unsigned long util; + unsigned long margin; + +#ifdef CONFIG_CGROUP_SCHEDTUNE + boost = schedtune_task_boost(task); +#else + boost = get_sysctl_sched_cfs_boost(); +#endif + if (boost == 0) + return 0; + + util = task_util(task); + margin = schedtune_margin(util, boost); + + return margin; +} + #else /* CONFIG_SCHED_TUNE */ static inline unsigned int @@ -5599,6 +5622,12 @@ schedtune_cpu_margin(unsigned long util, int cpu) return 0; } +static inline unsigned int +schedtune_task_margin(struct task_struct *task) +{ + return 0; +} + #endif /* CONFIG_SCHED_TUNE */ static inline unsigned long @@ -5610,6 +5639,15 @@ boosted_cpu_util(int cpu) return util + margin; } +static inline unsigned long +boosted_task_util(struct task_struct *task) +{ + unsigned long util = task_util(task); + unsigned long margin = schedtune_task_margin(task); + + return util + margin; +} + /* * find_idlest_group finds and returns the least busy CPU group within the * domain. @@ -6043,7 +6081,7 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target) * so prev_cpu will receive a negative bias due to the double * accounting. However, the blocked utilization may be zero. */ - int new_util = cpu_util(i) + task_util(p); + int new_util = cpu_util(i) + boosted_task_util(p); if (new_util > capacity_orig_of(i)) continue; diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 540b945a01ce..87213861bde5 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -240,6 +240,20 @@ int schedtune_cpu_boost(int cpu) return bg->boost_max; } +int schedtune_task_boost(struct task_struct *p) +{ + struct schedtune *st; + int task_boost; + + /* Get task boost value */ + rcu_read_lock(); + st = task_schedtune(p); + task_boost = st->boost; + rcu_read_unlock(); + + return task_boost; +} + static u64 boost_read(struct cgroup_subsys_state *css, struct cftype *cft) { diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h index 561b5171a19b..d756ce7b06e0 100644 --- a/kernel/sched/tune.h +++ b/kernel/sched/tune.h @@ -4,6 +4,7 @@ #ifdef CONFIG_CGROUP_SCHEDTUNE int schedtune_cpu_boost(int cpu); +int schedtune_task_boost(struct task_struct *tsk); void schedtune_enqueue_task(struct task_struct *p, int cpu); void schedtune_dequeue_task(struct task_struct *p, int cpu); -- GitLab From 632905f02f1576df2f4eb5f1c8756bea3ffaad12 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Thu, 14 Jan 2016 18:35:13 +0000 Subject: [PATCH 1159/1442] ANDROID: sched/fair: keep track of energy/capacity variations The current EAS implementation does not allow "to boost" tasks performances, for example by running them at an higher OPP (or a more capable CPU), even if that could require a "reasonable" increase in energy consumption. To defined how much reasonable is an energy increase with respect to a required boost value, it is required to define and compute a trade-off between the expected energy and performance variations. However, the current EAS implementation considers only energy variations while completely disregard the impact on performance for the selection of a certain schedule candidate. This patch extends the eenv energy environment to keep track of both energy and performance deltas which are implied by the activation of a schedule candidate. The performance variation is estimated considering the different capacities of the CPUs in which the task could be scheduled. The idea is that while running on a CPU with higher capacity (e.g. higher operating point) the task could (potentially) complete faster and thus get better performance. Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2eec4f1e13ff..52f187969878 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5161,6 +5161,16 @@ struct energy_env { int src_cpu; int dst_cpu; int energy; + struct { + int before; + int after; + int diff; + } nrg; + struct { + int before; + int after; + int delta; + } cap; }; /* @@ -5327,6 +5337,22 @@ static int sched_group_energy(struct energy_env *eenv) eenv->sg_cap = sg; cap_idx = find_new_capacity(eenv, sg->sge); + + if (sg->group_weight == 1) { + /* Remove capacity of src CPU (before task move) */ + if (eenv->util_delta == 0 && + cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) { + eenv->cap.before = sg->sge->cap_states[cap_idx].cap; + eenv->cap.delta -= eenv->cap.before; + } + /* Add capacity of dst CPU (after task move) */ + if (eenv->util_delta != 0 && + cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) { + eenv->cap.after = sg->sge->cap_states[cap_idx].cap; + eenv->cap.delta += eenv->cap.after; + } + } + idle_idx = group_idle_state(sg); group_util = group_norm_util(eenv, sg); sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) @@ -5375,6 +5401,8 @@ static int energy_diff(struct energy_env *eenv) .util_delta = 0, .src_cpu = eenv->src_cpu, .dst_cpu = eenv->dst_cpu, + .nrg = { 0, 0, 0 }, + .cap = { 0, 0, 0 }, }; if (eenv->src_cpu == eenv->dst_cpu) @@ -5396,13 +5424,21 @@ static int energy_diff(struct energy_env *eenv) return 0; /* Invalid result abort */ energy_before += eenv_before.energy; + /* Keep track of SRC cpu (before) capacity */ + eenv->cap.before = eenv_before.cap.before; + eenv->cap.delta = eenv_before.cap.delta; + if (sched_group_energy(eenv)) return 0; /* Invalid result abort */ energy_after += eenv->energy; } } while (sg = sg->next, sg != sd->groups); - return energy_after-energy_before; + eenv->nrg.before = energy_before; + eenv->nrg.after = energy_after; + eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before; + + return eenv->nrg.diff; } /* -- GitLab From 2f369bb24ee8b8bd74e4fb9ad0dcfee44fdc380f Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Tue, 12 Jan 2016 18:12:13 +0000 Subject: [PATCH 1160/1442] ANDROID: sched/tune: add support to compute normalized energy The current EAS implementation considers only energy variations, while it disregards completely the impact on performance for the selection of a certain schedule candidate. Moreover, it also makes its decision based on the "absolute" value of expected energy variations. In order to properly define a trade-off strategy between increased energy consumption and performances benefits it is required to compare energy variations with performance variations. Thus, both performance and energy metrics must be expressed in comparable units. While the performance variations are expressed in terms of capacity deltas, which are defined in the range [0..SCHED_LOAD_SCALE], the same scale is not used for energy variations. This patch introduces the function: schedtune_normalize_energy(energy_diff) which returns a normalized value in the same range of capacity variations, i.e. [0..SCHED_LOAD_SCALE]. A proper set of energy normalization constants are required to provide a fast division by a constant during the normalziation of the energy_diff. The value of these constants depends on the specific energy model and topology of a target device. Thus, this patch provides also the required support for the computation at boot time of this set of variables. Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/tune.c | 321 ++++++++++++++++++++++++++++++++++++++++++++ kernel/sched/tune.h | 7 + 2 files changed, 328 insertions(+) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 87213861bde5..968dcd70eaaf 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -1,7 +1,9 @@ #include #include +#include #include #include +#include #include #include @@ -9,6 +11,84 @@ unsigned int sysctl_sched_cfs_boost __read_mostly; +/* + * System energy normalization constants + */ +static struct target_nrg { + unsigned long min_power; + unsigned long max_power; + struct reciprocal_value rdiv; +} schedtune_target_nrg; + +/* Performance Boost region (B) threshold params */ +static int perf_boost_idx; + +/* Performance Constraint region (C) threshold params */ +static int perf_constrain_idx; + +/** + * Performance-Energy (P-E) Space thresholds constants + */ +struct threshold_params { + int nrg_gain; + int cap_gain; +}; + +/* + * System specific P-E space thresholds constants + */ +static struct threshold_params +threshold_gains[] = { + { 0, 4 }, /* >= 0% */ + { 0, 4 }, /* >= 10% */ + { 1, 4 }, /* >= 20% */ + { 2, 4 }, /* >= 30% */ + { 3, 4 }, /* >= 40% */ + { 4, 3 }, /* >= 50% */ + { 4, 2 }, /* >= 60% */ + { 4, 1 }, /* >= 70% */ + { 4, 0 }, /* >= 80% */ + { 4, 0 } /* >= 90% */ +}; + +static int +__schedtune_accept_deltas(int nrg_delta, int cap_delta, + int perf_boost_idx, int perf_constrain_idx) +{ + int payoff = -INT_MAX; + + /* Performance Boost (B) region */ + if (nrg_delta > 0 && cap_delta > 0) { + /* + * Evaluate "Performance Boost" vs "Energy Increase" + * payoff criteria: + * cap_delta / nrg_delta < cap_gain / nrg_gain + * which is: + * nrg_delta * cap_gain > cap_delta * nrg_gain + */ + payoff = nrg_delta * threshold_gains[perf_boost_idx].cap_gain; + payoff -= cap_delta * threshold_gains[perf_boost_idx].nrg_gain; + return payoff; + } + + /* Performance Constraint (C) region */ + if (nrg_delta < 0 && cap_delta < 0) { + /* + * Evaluate "Performance Boost" vs "Energy Increase" + * payoff criteria: + * cap_delta / nrg_delta > cap_gain / nrg_gain + * which is: + * cap_delta * nrg_gain > nrg_delta * cap_gain + */ + payoff = cap_delta * threshold_gains[perf_constrain_idx].nrg_gain; + payoff -= nrg_delta * threshold_gains[perf_constrain_idx].cap_gain; + return payoff; + } + + /* Default: reject schedule candidate */ + return payoff; +} + #ifdef CONFIG_CGROUP_SCHEDTUNE /* @@ -26,6 +106,11 @@ struct schedtune { /* Boost value for tasks on that SchedTune CGroup */ int boost; + /* Performance Boost (B) region threshold params */ + int perf_boost_idx; + + /* Performance Constraint (C) region threshold params */ + int perf_constrain_idx; }; static inline struct schedtune *css_st(struct cgroup_subsys_state *css) @@ -55,8 +140,37 @@ static inline struct schedtune *parent_st(struct schedtune *st) static struct schedtune root_schedtune = { .boost = 0, + .perf_boost_idx = 0, + .perf_constrain_idx = 0, }; +int +schedtune_accept_deltas(int nrg_delta, int cap_delta, + struct task_struct *task) +{ + struct schedtune *ct; + int perf_boost_idx; + int perf_constrain_idx; + + /* Optimal (O) region */ + if (nrg_delta < 0 && cap_delta > 0) + return INT_MAX; + + /* Suboptimal (S) region */ + if (nrg_delta > 0 && cap_delta < 0) + return -INT_MAX; + + /* Get task specific perf Boost/Constraints indexes */ + rcu_read_lock(); + ct = task_schedtune(task); + perf_boost_idx = ct->perf_boost_idx; + perf_constrain_idx = ct->perf_constrain_idx; + rcu_read_unlock(); + + return __schedtune_accept_deltas(nrg_delta, cap_delta, + perf_boost_idx, perf_constrain_idx); +} + /* * Maximum number of boost groups to support * When per-task boosting is used we still allow only limited number of @@ -396,6 +510,24 @@ struct cgroup_subsys schedtune_cgrp_subsys = { .early_init = 1, }; +#else /* CONFIG_CGROUP_SCHEDTUNE */ + +int +schedtune_accept_deltas(int nrg_delta, int cap_delta, + struct task_struct *task) +{ + /* Optimal (O) region */ + if (nrg_delta < 0 && cap_delta > 0) + return INT_MAX; + + /* Suboptimal (S) region */ + if (nrg_delta > 0 && cap_delta < 0) + return -INT_MAX; + + return __schedtune_accept_deltas(nrg_delta, cap_delta, + perf_boost_idx, perf_constrain_idx); +} + #endif /* CONFIG_CGROUP_SCHEDTUNE */ int @@ -408,5 +540,194 @@ sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write, if (ret || !write) return ret; + /* Performance Boost (B) region threshold params */ + perf_boost_idx = sysctl_sched_cfs_boost; + perf_boost_idx /= 10; + + /* Performance Constraint (C) region threshold params */ + perf_constrain_idx = 100 - sysctl_sched_cfs_boost; + perf_constrain_idx /= 10; + + return 0; +} + +/* + * System energy normalization + * Returns the normalized value, in the range [0..SCHED_LOAD_SCALE], + * corresponding to the specified energy variation. + */ +int +schedtune_normalize_energy(int energy_diff) +{ + u32 normalized_nrg; + int max_delta; + +#ifdef CONFIG_SCHED_DEBUG + /* Check for boundaries */ + max_delta = schedtune_target_nrg.max_power; + max_delta -= schedtune_target_nrg.min_power; + WARN_ON(abs(energy_diff) >= max_delta); +#endif + + /* Do scaling using positive numbers to increase the range */ + normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff; + + /* Scale by energy magnitude */ + normalized_nrg <<= SCHED_CAPACITY_SHIFT; + + /* Normalize on max energy for target platform */ + normalized_nrg = reciprocal_divide( + normalized_nrg, schedtune_target_nrg.rdiv); + + return (energy_diff < 0) ? -normalized_nrg : normalized_nrg; +} + +#ifdef CONFIG_SCHED_DEBUG +static void +schedtune_test_nrg(unsigned long delta_pwr) +{ + unsigned long test_delta_pwr; + unsigned long test_norm_pwr; + int idx; + + /* + * Check normalization constants using some constant system + * energy values + */ + pr_info("schedtune: verify normalization constants...\n"); + for (idx = 0; idx < 6; ++idx) { + test_delta_pwr = delta_pwr >> idx; + + /* Normalize on max energy for target platform */ + test_norm_pwr = reciprocal_divide( + test_delta_pwr << SCHED_CAPACITY_SHIFT, + schedtune_target_nrg.rdiv); + + pr_info("schedtune: max_pwr/2^%d: %4lu => norm_pwr: %5lu\n", + idx, test_delta_pwr, test_norm_pwr); + } +} +#else +#define schedtune_test_nrg(delta_pwr) +#endif + +/* + * Compute the min/max power consumption of a cluster and all its CPUs + */ +static void +schedtune_add_cluster_nrg( + struct sched_domain *sd, + struct sched_group *sg, + struct target_nrg *ste) +{ + struct sched_domain *sd2; + struct sched_group *sg2; + + struct cpumask *cluster_cpus; + char str[32]; + + unsigned long min_pwr; + unsigned long max_pwr; + int cpu; + + /* Get Cluster energy using EM data for the first CPU */ + cluster_cpus = sched_group_cpus(sg); + snprintf(str, 32, "CLUSTER[%*pbl]", + cpumask_pr_args(cluster_cpus)); + + min_pwr = sg->sge->idle_states[sg->sge->nr_idle_states - 1].power; + max_pwr = sg->sge->cap_states[sg->sge->nr_cap_states - 1].power; + pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n", + str, min_pwr, max_pwr); + + /* + * Keep track of this cluster's energy in the computation of the + * overall system energy + */ + ste->min_power += min_pwr; + ste->max_power += max_pwr; + + /* Get CPU energy using EM data for each CPU in the group */ + for_each_cpu(cpu, cluster_cpus) { + /* Get a SD view for the specific CPU */ + for_each_domain(cpu, sd2) { + /* Get the CPU group */ + sg2 = sd2->groups; + min_pwr = sg2->sge->idle_states[sg2->sge->nr_idle_states - 1].power; + max_pwr = sg2->sge->cap_states[sg2->sge->nr_cap_states - 1].power; + + ste->min_power += min_pwr; + ste->max_power += max_pwr; + + snprintf(str, 32, "CPU[%d]", cpu); + pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n", + str, min_pwr, max_pwr); + + /* + * Assume we have EM data only at the CPU and + * the upper CLUSTER level + */ + BUG_ON(!cpumask_equal( + sched_group_cpus(sg), + sched_group_cpus(sd2->parent->groups) + )); + break; + } + } +} + +/* + * Initialize the constants required to compute normalized energy. + * The values of these constants depends on the EM data for the specific + * target system and topology. + * Thus, this function is expected to be called by the code + * that bind the EM to the topology information. + */ +static int +schedtune_init_late(void) +{ + struct target_nrg *ste = &schedtune_target_nrg; + unsigned long delta_pwr = 0; + struct sched_domain *sd; + struct sched_group *sg; + + pr_info("schedtune: init normalization constants...\n"); + ste->max_power = 0; + ste->min_power = 0; + + rcu_read_lock(); + + /* + * When EAS is in use, we always have a pointer to the highest SD + * which provides EM data. + */ + sd = rcu_dereference(per_cpu(sd_ea, cpumask_first(cpu_online_mask))); + if (!sd) { + pr_info("schedtune: no energy model data\n"); + goto nodata; + } + + sg = sd->groups; + do { + schedtune_add_cluster_nrg(sd, sg, ste); + } while (sg = sg->next, sg != sd->groups); + + rcu_read_unlock(); + + pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n", + "SYSTEM", ste->min_power, ste->max_power); + + /* Compute normalization constants */ + delta_pwr = ste->max_power - ste->min_power; + ste->rdiv = reciprocal_value(delta_pwr); + pr_info("schedtune: using normalization constants mul: %u sh1: %u sh2: %u\n", + ste->rdiv.m, ste->rdiv.sh1, ste->rdiv.sh2); + + schedtune_test_nrg(delta_pwr); return 0; + +nodata: + rcu_read_unlock(); + return -EINVAL; } +late_initcall(schedtune_init_late); diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h index d756ce7b06e0..f7273a5d994a 100644 --- a/kernel/sched/tune.h +++ b/kernel/sched/tune.h @@ -16,9 +16,16 @@ void schedtune_dequeue_task(struct task_struct *p, int cpu); #endif /* CONFIG_CGROUP_SCHEDTUNE */ +int schedtune_normalize_energy(int energy); +int schedtune_accept_deltas(int nrg_delta, int cap_delta, + struct task_struct *task); + #else /* CONFIG_SCHED_TUNE */ #define schedtune_enqueue_task(task, cpu) do { } while (0) #define schedtune_dequeue_task(task, cpu) do { } while (0) +#define schedtune_normalize_energy(energy) energy +#define schedtune_accept_deltas(nrg_delta, cap_delta, task) nrg_delta + #endif /* CONFIG_SCHED_TUNE */ -- GitLab From 88d0605215955cd8632f5ccca0338d7d725b5159 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Fri, 15 Jan 2016 15:48:03 +0000 Subject: [PATCH 1161/1442] ANDROID: sched/fair: filter energy_diff() based on energy_payoff value Once the SchedTune support is enabled and the CPU bandwidth demand of a task is boosted, we could expect increased energy consumptions which are balanced by corresponding increases of tasks performance. However, the current implementation of the energy_diff() function accepts all and _only_ the schedule candidates which results into a reduced expected system energy, which works against the boosting strategy. This patch links the energy_diff() function with the "energy payoff" engine provided by SchedTune. The energy variation computed by the energy_diff() function is now filtered using the SchedTune support to evaluated the energy payoff for a boosted task. With that patch, the energy_diff() function is going to reported as "acceptable schedule candidate" only the schedule candidate which corresponds to a positive energy_payoff. Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 47 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 52f187969878..4a8fb9688c1b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5161,9 +5161,12 @@ struct energy_env { int src_cpu; int dst_cpu; int energy; + int payoff; + struct task_struct *task; struct { int before; int after; + int delta; int diff; } nrg; struct { @@ -5384,6 +5387,44 @@ static inline bool cpu_in_sg(struct sched_group *sg, int cpu) return cpu != -1 && cpumask_test_cpu(cpu, sched_group_cpus(sg)); } +#ifdef CONFIG_SCHED_TUNE +static int energy_diff_evaluate(struct energy_env *eenv) +{ + unsigned int boost; + int nrg_delta; + + /* Return energy diff when boost margin is 0 */ +#ifdef CONFIG_CGROUP_SCHEDTUNE + boost = schedtune_task_boost(eenv->task); +#else + boost = get_sysctl_sched_cfs_boost(); +#endif + if (boost == 0) + return eenv->nrg.diff; + + /* Compute normalized energy diff */ + nrg_delta = schedtune_normalize_energy(eenv->nrg.diff); + eenv->nrg.delta = nrg_delta; + + eenv->payoff = schedtune_accept_deltas( + eenv->nrg.delta, + eenv->cap.delta, + eenv->task); + + /* + * When SchedTune is enabled, the energy_diff() function will return + * the computed energy payoff value. Since the energy_diff() return + * value is expected to be negative by its callers, this evaluation + * function return a negative value each time the evaluation return a + * positive payoff, which is the condition for the acceptance of + * a scheduling decision + */ + return -eenv->payoff; +} +#else /* CONFIG_SCHED_TUNE */ +#define energy_diff_evaluate(eenv) eenv->nrg.diff +#endif + /* * energy_diff(): Estimate the energy impact of changing the utilization * distribution. eenv specifies the change: utilisation amount, source, and @@ -5401,7 +5442,7 @@ static int energy_diff(struct energy_env *eenv) .util_delta = 0, .src_cpu = eenv->src_cpu, .dst_cpu = eenv->dst_cpu, - .nrg = { 0, 0, 0 }, + .nrg = { 0, 0, 0, 0}, .cap = { 0, 0, 0 }, }; @@ -5437,8 +5478,9 @@ static int energy_diff(struct energy_env *eenv) eenv->nrg.before = energy_before; eenv->nrg.after = energy_after; eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before; + eenv->payoff = 0; - return eenv->nrg.diff; + return energy_diff_evaluate(eenv); } /* @@ -6138,6 +6180,7 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target) .util_delta = task_util(p), .src_cpu = task_cpu(p), .dst_cpu = target_cpu, + .task = p, }; /* Not enough spare capacity on previous cpu */ -- GitLab From 0a942008a6268f8bfaeacb133f1a06ea39bcc00e Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 9 Nov 2015 12:06:24 +0000 Subject: [PATCH 1162/1442] ANDROID: DEBUG: sched: add tracepoint for cpu/freq scale invariance Signed-off-by: Juri Lelli Signed-off-by: Andres Oportus --- include/trace/events/sched.h | 24 ++++++++++++++++++++++++ kernel/sched/fair.c | 1 + 2 files changed, 25 insertions(+) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 3211890ee7d5..087aca330866 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -586,6 +586,30 @@ TRACE_EVENT(sched_wake_idle_without_ipi, TP_printk("cpu=%d", __entry->cpu) ); + +TRACE_EVENT(sched_contrib_scale_f, + + TP_PROTO(int cpu, unsigned long freq_scale_factor, + unsigned long cpu_scale_factor), + + TP_ARGS(cpu, freq_scale_factor, cpu_scale_factor), + + TP_STRUCT__entry( + __field(int, cpu) + __field(unsigned long, freq_scale_factor) + __field(unsigned long, cpu_scale_factor) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->freq_scale_factor = freq_scale_factor; + __entry->cpu_scale_factor = cpu_scale_factor; + ), + + TP_printk("cpu=%d freq_scale_factor=%lu cpu_scale_factor=%lu", + __entry->cpu, __entry->freq_scale_factor, + __entry->cpu_scale_factor) +); #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4a8fb9688c1b..f428baf3d44e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2817,6 +2817,7 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, scale_freq = arch_scale_freq_capacity(NULL, cpu); scale_cpu = arch_scale_cpu_capacity(NULL, cpu); + trace_sched_contrib_scale_f(cpu, scale_freq, scale_cpu); /* delta_w is the amount already accumulated against our next period */ delta_w = sa->period_contrib; -- GitLab From a4b0c3ac77086c703236c2b3552321cabdaa881f Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 9 Nov 2015 12:07:27 +0000 Subject: [PATCH 1163/1442] ANDROID: DEBUG: sched: add tracepoint for task load/util signals Signed-off-by: Juri Lelli Signed-off-by: Andres Oportus --- include/trace/events/sched.h | 43 ++++++++++++++++++++++++++++++++++++ kernel/sched/fair.c | 3 +++ 2 files changed, 46 insertions(+) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 087aca330866..daf9c3d3d026 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -610,6 +610,49 @@ TRACE_EVENT(sched_contrib_scale_f, __entry->cpu, __entry->freq_scale_factor, __entry->cpu_scale_factor) ); + +/* + * Tracepoint for accounting sched averages for tasks. + */ +TRACE_EVENT(sched_load_avg_task, + + TP_PROTO(struct task_struct *tsk, struct sched_avg *avg), + + TP_ARGS(tsk, avg), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, cpu ) + __field( unsigned long, load_avg ) + __field( unsigned long, util_avg ) + __field( u64, load_sum ) + __field( u32, util_sum ) + __field( u32, period_contrib ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->cpu = task_cpu(tsk); + __entry->load_avg = avg->load_avg; + __entry->util_avg = avg->util_avg; + __entry->load_sum = avg->load_sum; + __entry->util_sum = avg->util_sum; + __entry->period_contrib = avg->period_contrib; + ), + + TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu util_avg=%lu load_sum=%llu" + " util_sum=%u period_contrib=%u", + __entry->comm, + __entry->pid, + __entry->cpu, + __entry->load_avg, + __entry->util_avg, + (u64)__entry->load_sum, + (u32)__entry->util_sum, + (u32)__entry->period_contrib) +); #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f428baf3d44e..92b4cce424b0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3085,6 +3085,9 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg) update_tg_load_avg(cfs_rq, 0); + + if (entity_is_task(se)) + trace_sched_load_avg_task(task_of(se), &se->avg); } /** -- GitLab From 79478804c2f2a3a7e8bf6ec13b6b976f0cf44242 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 9 Nov 2015 12:07:48 +0000 Subject: [PATCH 1164/1442] ANDROID: DEBUG: sched: add tracepoint for CPU load/util signals Signed-off-by: Juri Lelli Signed-off-by: Andres Oportus --- include/trace/events/sched.h | 25 +++++++++++++++++++++++++ kernel/sched/fair.c | 1 + 2 files changed, 26 insertions(+) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index daf9c3d3d026..ce8ac5d2c0fc 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -653,6 +653,31 @@ TRACE_EVENT(sched_load_avg_task, (u32)__entry->util_sum, (u32)__entry->period_contrib) ); + +/* + * Tracepoint for accounting sched averages for cpus. + */ +TRACE_EVENT(sched_load_avg_cpu, + + TP_PROTO(int cpu, struct cfs_rq *cfs_rq), + + TP_ARGS(cpu, cfs_rq), + + TP_STRUCT__entry( + __field( int, cpu ) + __field( unsigned long, load_avg ) + __field( unsigned long, util_avg ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->load_avg = cfs_rq->avg.load_avg; + __entry->util_avg = cfs_rq->avg.util_avg; + ), + + TP_printk("cpu=%d load_avg=%lu util_avg=%lu", + __entry->cpu, __entry->load_avg, __entry->util_avg) +); #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 92b4cce424b0..71f9a5395f0a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3088,6 +3088,7 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) if (entity_is_task(se)) trace_sched_load_avg_task(task_of(se), &se->avg); + trace_sched_load_avg_cpu(cpu, cfs_rq); } /** -- GitLab From 2f8ed12fc99b24a7c35ba17bea99915471952430 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Thu, 30 Apr 2015 17:35:23 +0100 Subject: [PATCH 1165/1442] ANDROID: DEBUG: sched,cpufreq: add cpu_capacity change tracepoint This is useful when we want to compare cpu utilization and cpu curr capacity side by side. Signed-off-by: Juri Lelli Signed-off-by: Andres Oportus --- drivers/cpufreq/cpufreq.c | 4 ++++ include/linux/sched.h | 2 ++ include/trace/events/power.h | 7 +++++++ kernel/sched/fair.c | 11 +++++++++++ kernel/sched/sched.h | 11 ----------- 5 files changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 19ea8f545cec..6a3d07869a5b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -29,6 +29,7 @@ #include #include #include +#include #include static LIST_HEAD(cpufreq_policy_list); @@ -428,6 +429,7 @@ static void cpufreq_notify_post_transition(struct cpufreq_policy *policy, void cpufreq_freq_transition_begin(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs) { + int cpu; /* * Catch double invocations of _begin() which lead to self-deadlock. @@ -456,6 +458,8 @@ void cpufreq_freq_transition_begin(struct cpufreq_policy *policy, spin_unlock(&policy->transition_lock); scale_freq_capacity(policy, freqs); + for_each_cpu(cpu, policy->cpus) + trace_cpu_capacity(capacity_curr_of(cpu), cpu); cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 074a7c0667eb..d1ab918c3046 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1092,6 +1092,8 @@ struct sched_group_energy { struct capacity_state *cap_states; /* ptr to capacity state array */ }; +unsigned long capacity_curr_of(int cpu); + struct sched_group; struct sched_domain_shared { diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 070be71b1aac..ec6f81561558 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -172,6 +172,13 @@ TRACE_EVENT(cpu_frequency_limits, (unsigned long)__entry->cpu_id) ); +DEFINE_EVENT(cpu, cpu_capacity, + + TP_PROTO(unsigned int capacity, unsigned int cpu_id), + + TP_ARGS(capacity, cpu_id) +); + TRACE_EVENT(device_pm_callback_start, TP_PROTO(struct device *dev, const char *pm_ops, int event), diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 71f9a5395f0a..766c693abde8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5153,6 +5153,17 @@ static void record_wakee(struct task_struct *p) } } +/* + * Returns the current capacity of cpu after applying both + * cpu and freq scaling. + */ +unsigned long capacity_curr_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity_orig * + arch_scale_freq_capacity(NULL, cpu) + >> SCHED_CAPACITY_SHIFT; +} + static inline bool energy_aware(void) { return sched_feat(ENERGY_AWARE); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7de37eb85b3c..1f5fb36d5aee 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1575,17 +1575,6 @@ static inline unsigned long cpu_util(int cpu) return __cpu_util(cpu, 0); } -/* - * Returns the current capacity of cpu after applying both - * cpu and freq scaling. - */ -static inline unsigned long capacity_curr_of(int cpu) -{ - return cpu_rq(cpu)->cpu_capacity_orig * - arch_scale_freq_capacity(NULL, cpu) - >> SCHED_CAPACITY_SHIFT; -} - #endif #ifdef CONFIG_CPU_FREQ_GOV_SCHED -- GitLab From 050dcb86c040b3b8d077535137251b1f2ecca93e Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Mon, 22 Jun 2015 13:49:07 +0100 Subject: [PATCH 1166/1442] ANDROID: DEBUG: schedtune: add tracepoint for SchedTune configuration update Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- include/trace/events/sched.h | 21 +++++++++++++++++++++ kernel/sched/tune.c | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index ce8ac5d2c0fc..0af5007421e4 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -678,6 +678,27 @@ TRACE_EVENT(sched_load_avg_cpu, TP_printk("cpu=%d load_avg=%lu util_avg=%lu", __entry->cpu, __entry->load_avg, __entry->util_avg) ); + +/* + * Tracepoint for sched_tune_config settings + */ +TRACE_EVENT(sched_tune_config, + + TP_PROTO(int boost), + + TP_ARGS(boost), + + TP_STRUCT__entry( + __field( int, boost ) + ), + + TP_fast_assign( + __entry->boost = boost; + ), + + TP_printk("boost=%d ", __entry->boost) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 968dcd70eaaf..8b9ca8610a36 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -7,6 +7,8 @@ #include #include +#include + #include "sched.h" unsigned int sysctl_sched_cfs_boost __read_mostly; @@ -392,6 +394,8 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft, /* Update CPU boost */ schedtune_boostgroup_update(st->idx, st->boost); + trace_sched_tune_config(st->boost); + return 0; } -- GitLab From cccead1ef9405fe626740014194173ef014e8518 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Mon, 22 Jun 2015 13:51:07 +0100 Subject: [PATCH 1167/1442] ANDROID: DEBUG: schedtune: add tracepoint for CPU boost signal Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- include/trace/events/sched.h | 27 +++++++++++++++++++++++++++ kernel/sched/fair.c | 2 ++ 2 files changed, 29 insertions(+) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 0af5007421e4..6afab79d71c6 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -699,6 +699,33 @@ TRACE_EVENT(sched_tune_config, TP_printk("boost=%d ", __entry->boost) ); +/* + * Tracepoint for accounting CPU boosted utilization + */ +TRACE_EVENT(sched_boost_cpu, + + TP_PROTO(int cpu, unsigned long util, unsigned long margin), + + TP_ARGS(cpu, util, margin), + + TP_STRUCT__entry( + __field( int, cpu ) + __field( unsigned long, util ) + __field( unsigned long, margin ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->util = util; + __entry->margin = margin; + ), + + TP_printk("cpu=%d util=%lu margin=%lu", + __entry->cpu, + __entry->util, + __entry->margin) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 766c693abde8..235a1a4e6498 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5730,6 +5730,8 @@ boosted_cpu_util(int cpu) unsigned long util = cpu_util(cpu); unsigned long margin = schedtune_cpu_margin(util, cpu); + trace_sched_boost_cpu(cpu, util, margin); + return util + margin; } -- GitLab From 953b10450e0ff5b4aba1c10f1b90c73b1d452b1c Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Wed, 24 Jun 2015 15:36:08 +0100 Subject: [PATCH 1168/1442] ANDROID: DEBUG: schedtune: add tracepoint for schedtune_tasks_update() values Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- include/trace/events/sched.h | 62 ++++++++++++++++++++++++++++++++++++ kernel/sched/tune.c | 12 ++++++- 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 6afab79d71c6..4913e2c0cfdf 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -726,6 +726,68 @@ TRACE_EVENT(sched_boost_cpu, __entry->margin) ); +/* + * Tracepoint for schedtune_tasks_update + */ +TRACE_EVENT(sched_tune_tasks_update, + + TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx, + unsigned int boost, unsigned int max_boost), + + TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, cpu ) + __field( int, tasks ) + __field( int, idx ) + __field( unsigned int, boost ) + __field( unsigned int, max_boost ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->cpu = cpu; + __entry->tasks = tasks; + __entry->idx = idx; + __entry->boost = boost; + __entry->max_boost = max_boost; + ), + + TP_printk("pid=%d comm=%s " + "cpu=%d tasks=%d idx=%d boost=%u max_boost=%u", + __entry->pid, __entry->comm, + __entry->cpu, __entry->tasks, __entry->idx, + __entry->boost, __entry->max_boost) +); + +/* + * Tracepoint for schedtune_boostgroup_update + */ +TRACE_EVENT(sched_tune_boostgroup_update, + + TP_PROTO(int cpu, int variation, int max_boost), + + TP_ARGS(cpu, variation, max_boost), + + TP_STRUCT__entry( + __field( int, cpu ) + __field( int, variation ) + __field( int, max_boost ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->variation = variation; + __entry->max_boost = max_boost; + ), + + TP_printk("cpu=%d variation=%d max_boost=%d", + __entry->cpu, __entry->variation, __entry->max_boost) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 8b9ca8610a36..db54cc86a942 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -264,12 +264,18 @@ schedtune_boostgroup_update(int idx, int boost) /* Check if this update increase current max */ if (boost > cur_boost_max && bg->group[idx].tasks) { bg->boost_max = boost; + trace_sched_tune_boostgroup_update(cpu, 1, bg->boost_max); continue; } /* Check if this update has decreased current max */ - if (cur_boost_max == old_boost && old_boost > boost) + if (cur_boost_max == old_boost && old_boost > boost) { schedtune_cpu_update(cpu); + trace_sched_tune_boostgroup_update(cpu, -1, bg->boost_max); + continue; + } + + trace_sched_tune_boostgroup_update(cpu, 0, bg->boost_max); } return 0; @@ -293,6 +299,10 @@ schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count) tasks = bg->group[idx].tasks; if (tasks == 1 || tasks == 0) schedtune_cpu_update(cpu); + + trace_sched_tune_tasks_update(p, cpu, tasks, idx, + bg->group[idx].boost, bg->boost_max); + } /* -- GitLab From 77501866f8df6145774d07410adbcc858344152c Mon Sep 17 00:00:00 2001 From: Joseph Lo Date: Mon, 22 Apr 2013 14:39:18 +0800 Subject: [PATCH 1169/1442] CHROMIUM: sched: update the average of nr_running Doing a Exponential moving average per nr_running++/-- does not guarantee a fixed sample rate which induces errors if there are lots of threads being enqueued/dequeued from the rq (Linpack mt). Instead of keeping track of the avg, the scheduler now keeps track of the integral of nr_running and allows the readers to perform filtering on top. Original-author: Sai Charan Gurrappadi Change-Id: Id946654f32fa8be0eaf9d8fa7c9a8039b5ef9fab Signed-off-by: Joseph Lo Signed-off-by: Andrew Bresticker Reviewed-on: https://chromium-review.googlesource.com/174694 Reviewed-on: https://chromium-review.googlesource.com/272853 [jstultz: fwdported to 4.4] Signed-off-by: John Stultz Signed-off-by: Andres Oportus --- include/linux/sched.h | 3 +++ kernel/sched/core.c | 30 ++++++++++++++++++++++++++ kernel/sched/sched.h | 49 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index d1ab918c3046..3d779262b0c4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -173,6 +173,9 @@ extern bool single_task_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); +#ifdef CONFIG_CPU_QUIET +extern u64 nr_running_integral(unsigned int cpu); +#endif extern void calc_global_load(unsigned long ticks); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8e60c55be0c8..4d0c64608454 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2964,6 +2964,36 @@ unsigned long nr_iowait_cpu(int cpu) return atomic_read(&this->nr_iowait); } +#ifdef CONFIG_CPU_QUIET +u64 nr_running_integral(unsigned int cpu) +{ + unsigned int seqcnt; + u64 integral; + struct rq *q; + + if (cpu >= nr_cpu_ids) + return 0; + + q = cpu_rq(cpu); + + /* + * Update average to avoid reading stalled value if there were + * no run-queue changes for a long time. On the other hand if + * the changes are happening right now, just read current value + * directly. + */ + + seqcnt = read_seqcount_begin(&q->ave_seqcnt); + integral = do_nr_running_integral(q); + if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) { + read_seqcount_begin(&q->ave_seqcnt); + integral = q->nr_running_integral; + } + + return integral; +} +#endif + void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) { struct rq *rq = this_rq(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1f5fb36d5aee..77158fea1987 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -622,6 +622,14 @@ struct rq { #ifdef CONFIG_NO_HZ_FULL unsigned long last_sched_tick; #endif + +#ifdef CONFIG_CPU_QUIET + /* time-based average load */ + u64 nr_last_stamp; + u64 nr_running_integral; + seqcount_t ave_seqcnt; +#endif + /* capture load from *all* tasks on this cpu: */ struct load_weight load; unsigned long nr_load_updates; @@ -1426,7 +1434,7 @@ static inline void sched_update_tick_dependency(struct rq *rq) { } extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc); -static inline void add_nr_running(struct rq *rq, unsigned count) +static inline void __add_nr_running(struct rq *rq, unsigned count) { unsigned prev_nr = rq->nr_running; @@ -1442,13 +1450,50 @@ static inline void add_nr_running(struct rq *rq, unsigned count) sched_update_tick_dependency(rq); } -static inline void sub_nr_running(struct rq *rq, unsigned count) +static inline void __sub_nr_running(struct rq *rq, unsigned count) { rq->nr_running -= count; /* Check if we still need preemption */ sched_update_tick_dependency(rq); } +#ifdef CONFIG_CPU_QUIET +#define NR_AVE_SCALE(x) ((x) << FSHIFT) +static inline u64 do_nr_running_integral(struct rq *rq) +{ + s64 nr, deltax; + u64 nr_running_integral = rq->nr_running_integral; + + deltax = rq->clock_task - rq->nr_last_stamp; + nr = NR_AVE_SCALE(rq->nr_running); + + nr_running_integral += nr * deltax; + + return nr_running_integral; +} + +static inline void add_nr_running(struct rq *rq, unsigned count) +{ + write_seqcount_begin(&rq->ave_seqcnt); + rq->nr_running_integral = do_nr_running_integral(rq); + rq->nr_last_stamp = rq->clock_task; + __add_nr_running(rq, count); + write_seqcount_end(&rq->ave_seqcnt); +} + +static inline void sub_nr_running(struct rq *rq, unsigned count) +{ + write_seqcount_begin(&rq->ave_seqcnt); + rq->nr_running_integral = do_nr_running_integral(rq); + rq->nr_last_stamp = rq->clock_task; + __sub_nr_running(rq, count); + write_seqcount_end(&rq->ave_seqcnt); +} +#else +#define add_nr_running __add_nr_running +#define sub_nr_running __sub_nr_running +#endif + static inline void rq_last_tick_reset(struct rq *rq) { #ifdef CONFIG_NO_HZ_FULL -- GitLab From ecccdb7343dbb4126ae044b14a86a0dad48c60df Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Thu, 14 Jan 2016 18:43:37 +0000 Subject: [PATCH 1170/1442] ANDROID: DEBUG: sched/tune: add tracepoint for task boost signal Change-Id: I545d3bf5569fc41c0fa70f51dff9a19c11d532ee Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- include/trace/events/sched.h | 30 ++++++++++++++++++++++++++++++ kernel/sched/fair.c | 2 ++ 2 files changed, 32 insertions(+) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4913e2c0cfdf..874204540236 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -788,6 +788,36 @@ TRACE_EVENT(sched_tune_boostgroup_update, __entry->cpu, __entry->variation, __entry->max_boost) ); +/* + * Tracepoint for accounting task boosted utilization + */ +TRACE_EVENT(sched_boost_task, + + TP_PROTO(struct task_struct *tsk, unsigned long util, unsigned long margin), + + TP_ARGS(tsk, util, margin), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( unsigned long, util ) + __field( unsigned long, margin ) + + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->util = util; + __entry->margin = margin; + ), + + TP_printk("comm=%s pid=%d util=%lu margin=%lu", + __entry->comm, __entry->pid, + __entry->util, + __entry->margin) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 235a1a4e6498..6ae74cc7728c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5741,6 +5741,8 @@ boosted_task_util(struct task_struct *task) unsigned long util = task_util(task); unsigned long margin = schedtune_task_margin(task); + trace_sched_boost_task(task, util, margin); + return util + margin; } -- GitLab From 110441baf62614f65fb0f20754b8dfca1486d6df Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Thu, 14 Jan 2016 18:47:21 +0000 Subject: [PATCH 1171/1442] ANDROID: DEBUG: sched/tune: add tracepoint for energy_diff() values Change-Id: Id8fafbd85f6d81248f322e073ee790a7ceec0bf7 Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- include/trace/events/sched.h | 57 ++++++++++++++++++++++++++++++++++++ kernel/sched/fair.c | 11 ++++++- 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 874204540236..8884ec604af8 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -818,6 +818,63 @@ TRACE_EVENT(sched_boost_task, __entry->margin) ); +/* + * Tracepoint for accounting sched group energy + */ +TRACE_EVENT(sched_energy_diff, + + TP_PROTO(struct task_struct *tsk, int scpu, int dcpu, int udelta, + int nrgb, int nrga, int nrgd, int capb, int capa, int capd, + int nrgn, int nrgp), + + TP_ARGS(tsk, scpu, dcpu, udelta, + nrgb, nrga, nrgd, capb, capa, capd, + nrgn, nrgp), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, scpu ) + __field( int, dcpu ) + __field( int, udelta ) + __field( int, nrgb ) + __field( int, nrga ) + __field( int, nrgd ) + __field( int, capb ) + __field( int, capa ) + __field( int, capd ) + __field( int, nrgn ) + __field( int, nrgp ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->scpu = scpu; + __entry->dcpu = dcpu; + __entry->udelta = udelta; + __entry->nrgb = nrgb; + __entry->nrga = nrga; + __entry->nrgd = nrgd; + __entry->capb = capb; + __entry->capa = capa; + __entry->capd = capd; + __entry->nrgn = nrgn; + __entry->nrgp = nrgp; + ), + + TP_printk("pid=%d comm=%s " + "src_cpu=%d dst_cpu=%d usage_delta=%d " + "nrg_before=%d nrg_after=%d nrg_diff=%d " + "cap_before=%d cap_after=%d cap_delta=%d " + "nrg_delta=%d nrg_payoff=%d", + __entry->pid, __entry->comm, + __entry->scpu, __entry->dcpu, __entry->udelta, + __entry->nrgb, __entry->nrga, __entry->nrgd, + __entry->capb, __entry->capa, __entry->capd, + __entry->nrgn, __entry->nrgp) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6ae74cc7728c..deff3ca73808 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5453,6 +5453,7 @@ static int energy_diff(struct energy_env *eenv) struct sched_domain *sd; struct sched_group *sg; int sd_cpu = -1, energy_before = 0, energy_after = 0; + int result; struct energy_env eenv_before = { .util_delta = 0, @@ -5496,7 +5497,15 @@ static int energy_diff(struct energy_env *eenv) eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before; eenv->payoff = 0; - return energy_diff_evaluate(eenv); + result = energy_diff_evaluate(eenv); + + trace_sched_energy_diff(eenv->task, + eenv->src_cpu, eenv->dst_cpu, eenv->util_delta, + eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff, + eenv->cap.before, eenv->cap.after, eenv->cap.delta, + eenv->nrg.delta, eenv->payoff); + + return result; } /* -- GitLab From 5824d986202b502f2e93b6710b332b2691db38c5 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Wed, 20 Jan 2016 14:06:05 +0000 Subject: [PATCH 1172/1442] ANDROID: DEBUG: sched/tune: add tracepoint on P-E space filtering Change-Id: I31dfed67c0486713b88efb75df767329f2802e06 Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- include/trace/events/sched.h | 35 +++++++++++++++++++++++++++++++++++ kernel/sched/tune.c | 30 ++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8884ec604af8..1e5ee70bb9b4 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -875,6 +875,41 @@ TRACE_EVENT(sched_energy_diff, __entry->nrgn, __entry->nrgp) ); +/* + * Tracepoint for schedtune_tasks_update + */ +TRACE_EVENT(sched_tune_filter, + + TP_PROTO(int nrg_delta, int cap_delta, + int nrg_gain, int cap_gain, + int payoff, int region), + + TP_ARGS(nrg_delta, cap_delta, nrg_gain, cap_gain, payoff, region), + + TP_STRUCT__entry( + __field( int, nrg_delta ) + __field( int, cap_delta ) + __field( int, nrg_gain ) + __field( int, cap_gain ) + __field( int, payoff ) + __field( int, region ) + ), + + TP_fast_assign( + __entry->nrg_delta = nrg_delta; + __entry->cap_delta = cap_delta; + __entry->nrg_gain = nrg_gain; + __entry->cap_gain = cap_gain; + __entry->payoff = payoff; + __entry->region = region; + ), + + TP_printk("nrg_delta=%d cap_delta=%d nrg_gain=%d cap_gain=%d payoff=%d region=%d", + __entry->nrg_delta, __entry->cap_delta, + __entry->nrg_gain, __entry->cap_gain, + __entry->payoff, __entry->region) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index db54cc86a942..4b385845c958 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -70,6 +70,13 @@ __schedtune_accept_deltas(int nrg_delta, int cap_delta, */ payoff = nrg_delta * threshold_gains[perf_boost_idx].cap_gain; payoff -= cap_delta * threshold_gains[perf_boost_idx].nrg_gain; + + trace_sched_tune_filter( + nrg_delta, cap_delta, + threshold_gains[perf_boost_idx].nrg_gain, + threshold_gains[perf_boost_idx].cap_gain, + payoff, 8); + return payoff; } @@ -84,6 +91,13 @@ __schedtune_accept_deltas(int nrg_delta, int cap_delta, */ payoff = cap_delta * threshold_gains[perf_constrain_idx].nrg_gain; payoff -= nrg_delta * threshold_gains[perf_constrain_idx].cap_gain; + + trace_sched_tune_filter( + nrg_delta, cap_delta, + threshold_gains[perf_constrain_idx].nrg_gain, + threshold_gains[perf_constrain_idx].cap_gain, + payoff, 6); + return payoff; } @@ -155,12 +169,16 @@ schedtune_accept_deltas(int nrg_delta, int cap_delta, int perf_constrain_idx; /* Optimal (O) region */ - if (nrg_delta < 0 && cap_delta > 0) + if (nrg_delta < 0 && cap_delta > 0) { + trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0); return INT_MAX; + } /* Suboptimal (S) region */ - if (nrg_delta > 0 && cap_delta < 0) + if (nrg_delta > 0 && cap_delta < 0) { + trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5); return -INT_MAX; + } /* Get task specific perf Boost/Constraints indexes */ rcu_read_lock(); @@ -531,12 +549,16 @@ schedtune_accept_deltas(int nrg_delta, int cap_delta, struct task_struct *task) { /* Optimal (O) region */ - if (nrg_delta < 0 && cap_delta > 0) + if (nrg_delta < 0 && cap_delta > 0) { + trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0); return INT_MAX; + } /* Suboptimal (S) region */ - if (nrg_delta > 0 && cap_delta < 0) + if (nrg_delta > 0 && cap_delta < 0) { + trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5); return -INT_MAX; + } return __schedtune_accept_deltas(nrg_delta, cap_delta, perf_boost_idx, perf_constrain_idx); -- GitLab From 2178e84423daa966cd8ebd54fa399fbfd648be37 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Fri, 22 Jul 2016 11:35:59 +0100 Subject: [PATCH 1173/1442] ANDROID: FIXUP: sched: fix build for non-SMP target Currently the build for a single-core (e.g. user-mode) Linux is broken and this configuration is required (at least) to run some network tests. The main issues for the current code support on single-core systems are: 1. {se,rq}::sched_avg is not available nor maintained for !SMP systems This means that load and utilisation signals are NOT available in single core systems. All the EAS code depends on these signals. 2. sched_group_energy is also SMP dependant. Again this means that all the EAS setup and preparation code (energyn model initialization) has to be properly guarded/disabled for !SMP systems. 3. SchedFreq depends on utilization signal, which is not available on !SMP systems. 4. SchedTune is useless on unicore systems if SchedFreq is not available. 5. WALT machinery is not required on single-core systems. This patch addresses all these issues by enforcing some constraints for single-core systems: a) WALT, SchedTune and SchedTune are now dependant on SMP b) The default governor for !SMP systems is INTERACTIVE c) The energy model initialisation/build functions are d) Other minor code re-arrangements and CONFIG_SMP guarding to enable single core builds. Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- drivers/cpufreq/Kconfig | 1 + include/linux/sched_energy.h | 8 ++++++++ include/trace/events/sched.h | 4 ++++ init/Kconfig | 1 + kernel/sched/Makefile | 4 ++-- kernel/sched/fair.c | 33 +++++++++++++++++++++++++++++---- kernel/sched/sched.h | 3 +-- 7 files changed, 46 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 964853056137..f98a90125aa5 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -194,6 +194,7 @@ config CPU_FREQ_GOV_CONSERVATIVE config CPU_FREQ_GOV_SCHED bool "'sched' cpufreq governor" depends on CPU_FREQ + depends on SMP select CPU_FREQ_GOV_COMMON help 'sched' - this governor scales cpu frequency from the diff --git a/include/linux/sched_energy.h b/include/linux/sched_energy.h index a3f1627ac609..1daf3e1f98a7 100644 --- a/include/linux/sched_energy.h +++ b/include/linux/sched_energy.h @@ -29,8 +29,16 @@ #define for_each_possible_sd_level(level) \ for (level = 0; level < NR_SD_LEVELS; level++) +#ifdef CONFIG_SMP + extern struct sched_group_energy *sge_array[NR_CPUS][NR_SD_LEVELS]; void init_sched_energy_costs(void); +#else + +#define init_sched_energy_costs() do { } while (0) + +#endif /* CONFIG_SMP */ + #endif diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 1e5ee70bb9b4..a8864dc58405 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -611,6 +611,8 @@ TRACE_EVENT(sched_contrib_scale_f, __entry->cpu_scale_factor) ); +#ifdef CONFIG_SMP + /* * Tracepoint for accounting sched averages for tasks. */ @@ -910,6 +912,8 @@ TRACE_EVENT(sched_tune_filter, __entry->payoff, __entry->region) ); +#endif /* CONFIG_SMP */ + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/init/Kconfig b/init/Kconfig index e8c2989848b0..b0bfddeb39a4 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1326,6 +1326,7 @@ config SCHED_AUTOGROUP config SCHED_TUNE bool "Boosting for CFS tasks (EXPERIMENTAL)" + depends on SMP help This option enables the system-wide support for task boosting. When this support is enabled a new sysctl interface is exposed to diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 86fd8b61ffd1..87be48374e01 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -16,9 +16,9 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer endif obj-y += core.o loadavg.o clock.o cputime.o -obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o energy.o +obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o obj-y += wait.o swait.o completion.o idle.o -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index deff3ca73808..34b6bf939335 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4534,10 +4534,16 @@ static inline void hrtick_update(struct rq *rq) } #endif +#ifdef CONFIG_SMP +static bool cpu_overutilized(int cpu); static unsigned long capacity_orig_of(int cpu); static unsigned long cpu_util(int cpu); static inline unsigned long boosted_cpu_util(int cpu); +#else +#define boosted_cpu_util(cpu) cpu_util(cpu) +#endif +#ifdef CONFIG_SMP static void update_capacity_of(int cpu) { unsigned long req_cap; @@ -4550,8 +4556,7 @@ static void update_capacity_of(int cpu) req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); set_cfs_cpu_capacity(cpu, true, req_cap); } - -static bool cpu_overutilized(int cpu); +#endif /* * The enqueue_task method is called before nr_running is @@ -4563,8 +4568,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; +#ifdef CONFIG_SMP int task_new = flags & ENQUEUE_WAKEUP_NEW; int task_wakeup = flags & ENQUEUE_WAKEUP; +#endif /* * If in_iowait is set, the code below may not trigger any cpufreq @@ -4604,8 +4611,12 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) update_cfs_shares(cfs_rq); } - if (!se) { + if (!se) add_nr_running(rq, 1); + +#ifdef CONFIG_SMP + + if (!se) { if (!task_new && !rq->rd->overutilized && cpu_overutilized(rq->cpu)) rq->rd->overutilized = true; @@ -4622,6 +4633,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (task_new || task_wakeup) update_capacity_of(cpu_of(rq)); } +#endif /* CONFIG_SMP */ + hrtick_update(rq); } @@ -4678,8 +4691,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) update_cfs_shares(cfs_rq); } - if (!se) { + if (!se) sub_nr_running(rq, 1); + +#ifdef CONFIG_SMP + + if (!se) { schedtune_dequeue_task(p, cpu_of(rq)); /* @@ -4697,6 +4714,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) set_cfs_cpu_capacity(cpu_of(rq), false, 0); } } + +#endif /* CONFIG_SMP */ + hrtick_update(rq); } @@ -6376,6 +6396,8 @@ static void task_dead_fair(struct task_struct *p) { remove_entity_load_avg(&p->se); } +#else +#define task_fits_max(p, cpu) true #endif /* CONFIG_SMP */ static unsigned long @@ -9313,10 +9335,13 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) if (static_branch_unlikely(&sched_numa_balancing)) task_tick_numa(rq, curr); +#ifdef CONFIG_SMP if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) rq->rd->overutilized = true; rq->misfit_task = !task_fits_max(curr, rq->cpu); +#endif + } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 77158fea1987..ba2efc714e43 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1328,6 +1328,7 @@ extern const struct sched_class idle_sched_class; #ifdef CONFIG_SMP +extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc); extern void update_group_capacity(struct sched_domain *sd, int cpu); extern void trigger_load_balance(struct rq *rq); @@ -1432,8 +1433,6 @@ static inline void sched_update_tick_dependency(struct rq *rq) static inline void sched_update_tick_dependency(struct rq *rq) { } #endif -extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc); - static inline void __add_nr_running(struct rq *rq, unsigned count) { unsigned prev_nr = rq->nr_running; -- GitLab From 98d73e419c146d79532eb46369e2b393d1a1d0f2 Mon Sep 17 00:00:00 2001 From: Srinath Sridharan Date: Mon, 1 Aug 2016 11:34:05 +0100 Subject: [PATCH 1174/1442] ANDROID: sched/cpufreq_sched: Consolidated update Contains: sched/cpufreq_sched: use shorter throttle for raising OPP Avoid cases where a brief drop in load causes a change to a low OPP for the full throttle period. Use a shorter throttle period for raising OPP than for lowering OPP. sched-freq: Fix handling of max/min frequency This reverts commit 9726142608f5b3bf5df4280243c9d324e692a510. Change-Id: Ia78095354f7ad9492f00deb509a2b45112361eda sched/cpufreq: Increasing throttle_down_nsec to 50ms Change-Id: I2d8969cf2a64fa719b9dd86f43f9dd14b1ff84fe sched-freq: make throttle times tunable Change-Id: I127879645367425b273441d7f0306bb15d5633cb Signed-off-by: Srinath Sridharan Signed-off-by: Todd Kjos Signed-off-by: Juri Lelli [jstultz: Fwdported to 4.4] Signed-off-by: John Stultz Signed-off-by: Andres Oportus --- kernel/sched/cpufreq_sched.c | 175 +++++++++++++++++++++++++++++++---- 1 file changed, 159 insertions(+), 16 deletions(-) diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c index 2e74f9732a36..83ffa79375fb 100644 --- a/kernel/sched/cpufreq_sched.c +++ b/kernel/sched/cpufreq_sched.c @@ -19,7 +19,8 @@ #include "sched.h" -#define THROTTLE_NSEC 50000000 /* 50ms default */ +#define THROTTLE_DOWN_NSEC 50000000 /* 50ms default */ +#define THROTTLE_UP_NSEC 500000 /* 500us default */ struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE; static bool __read_mostly cpufreq_driver_slow; @@ -33,8 +34,10 @@ DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); /** * gov_data - per-policy data internal to the governor - * @throttle: next throttling period expiry. Derived from throttle_nsec - * @throttle_nsec: throttle period length in nanoseconds + * @up_throttle: next throttling period expiry if increasing OPP + * @down_throttle: next throttling period expiry if decreasing OPP + * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP + * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP * @task: worker thread for dvfs transition that may block/sleep * @irq_work: callback used to wake up worker thread * @requested_freq: last frequency requested by the sched governor @@ -48,11 +51,14 @@ DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); * call down_write(policy->rwsem). */ struct gov_data { - ktime_t throttle; - unsigned int throttle_nsec; + ktime_t up_throttle; + ktime_t down_throttle; + unsigned int up_throttle_nsec; + unsigned int down_throttle_nsec; struct task_struct *task; struct irq_work irq_work; unsigned int requested_freq; + int max; }; static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy, @@ -66,25 +72,29 @@ static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy, __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); - gd->throttle = ktime_add_ns(ktime_get(), gd->throttle_nsec); + gd->up_throttle = ktime_add_ns(ktime_get(), gd->up_throttle_nsec); + gd->down_throttle = ktime_add_ns(ktime_get(), gd->down_throttle_nsec); up_write(&policy->rwsem); } -static bool finish_last_request(struct gov_data *gd) +static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq) { ktime_t now = ktime_get(); - if (ktime_after(now, gd->throttle)) + ktime_t throttle = gd->requested_freq < cur_freq ? + gd->down_throttle : gd->up_throttle; + + if (ktime_after(now, throttle)) return false; while (1) { - int usec_left = ktime_to_ns(ktime_sub(gd->throttle, now)); + int usec_left = ktime_to_ns(ktime_sub(throttle, now)); usec_left /= NSEC_PER_USEC; trace_cpufreq_sched_throttled(usec_left); usleep_range(usec_left, usec_left + 100); now = ktime_get(); - if (ktime_after(now, gd->throttle)) + if (ktime_after(now, throttle)) return true; } } @@ -128,7 +138,7 @@ static int cpufreq_sched_thread(void *data) * if the frequency thread sleeps while waiting to be * unthrottled, start over to check for a newer request */ - if (finish_last_request(gd)) + if (finish_last_request(gd, policy->cur)) continue; last_request = new_request; cpufreq_sched_try_driver_target(policy, new_request); @@ -183,13 +193,18 @@ static void update_fdomain_capacity_request(int cpu) } /* Convert the new maximum capacity request into a cpu frequency */ - freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT; + freq_new = capacity * gd->max >> SCHED_CAPACITY_SHIFT; index_new = cpufreq_frequency_table_target(policy, freq_new, CPUFREQ_RELATION_L); freq_new = policy->freq_table[index_new].frequency; + if (freq_new > policy->max) + freq_new = policy->max; + + if (freq_new < policy->min) + freq_new = policy->min; + trace_cpufreq_sched_request_opp(cpu, capacity, freq_new, gd->requested_freq); - if (freq_new == gd->requested_freq) goto out; @@ -243,10 +258,17 @@ static inline void clear_sched_freq(void) static_key_slow_dec(&__sched_freq); } +static struct attribute_group sched_attr_group_gov_pol; +static struct attribute_group *get_sysfs_attr(void) +{ + return &sched_attr_group_gov_pol; +} + static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) { struct gov_data *gd; int cpu; + int rc; for_each_cpu(cpu, policy->cpus) memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0, @@ -256,11 +278,20 @@ static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) if (!gd) return -ENOMEM; - gd->throttle_nsec = policy->cpuinfo.transition_latency ? + gd->up_throttle_nsec = policy->cpuinfo.transition_latency ? policy->cpuinfo.transition_latency : - THROTTLE_NSEC; + THROTTLE_UP_NSEC; + gd->down_throttle_nsec = THROTTLE_DOWN_NSEC; pr_debug("%s: throttle threshold = %u [ns]\n", - __func__, gd->throttle_nsec); + __func__, gd->up_throttle_nsec); + + gd->max = policy->max; + + rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr()); + if (rc) { + pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc); + goto err; + } if (cpufreq_driver_is_slow()) { cpufreq_driver_slow = true; @@ -298,6 +329,8 @@ static void cpufreq_sched_policy_exit(struct cpufreq_policy *policy) put_task_struct(gd->task); } + sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr()); + policy->governor_data = NULL; kfree(gd); @@ -313,6 +346,32 @@ static int cpufreq_sched_start(struct cpufreq_policy *policy) return 0; } +static void cpufreq_sched_limits(struct cpufreq_policy *policy) +{ + struct gov_data *gd; + + pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n", + policy->cpu, policy->min, policy->max, + policy->cur); + + if (!down_write_trylock(&policy->rwsem)) + return; + /* + * Need to keep track of highest max frequency for + * capacity calculations + */ + gd = policy->governor_data; + if (gd->max < policy->max) + gd->max = policy->max; + + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); + + up_write(&policy->rwsem); +} + static void cpufreq_sched_stop(struct cpufreq_policy *policy) { int cpu; @@ -321,6 +380,89 @@ static void cpufreq_sched_stop(struct cpufreq_policy *policy) per_cpu(enabled, cpu) = 0; } +/* Tunables */ +static ssize_t show_up_throttle_nsec(struct gov_data *gd, char *buf) +{ + return sprintf(buf, "%u\n", gd->up_throttle_nsec); +} + +static ssize_t store_up_throttle_nsec(struct gov_data *gd, + const char *buf, size_t count) +{ + int ret; + long unsigned int val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + gd->up_throttle_nsec = val; + return count; +} + +static ssize_t show_down_throttle_nsec(struct gov_data *gd, char *buf) +{ + return sprintf(buf, "%u\n", gd->down_throttle_nsec); +} + +static ssize_t store_down_throttle_nsec(struct gov_data *gd, + const char *buf, size_t count) +{ + int ret; + long unsigned int val; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + gd->down_throttle_nsec = val; + return count; +} + +/* + * Create show/store routines + * - sys: One governor instance for complete SYSTEM + * - pol: One governor instance per struct cpufreq_policy + */ +#define show_gov_pol_sys(file_name) \ +static ssize_t show_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + return show_##file_name(policy->governor_data, buf); \ +} + +#define store_gov_pol_sys(file_name) \ +static ssize_t store_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, const char *buf, size_t count) \ +{ \ + return store_##file_name(policy->governor_data, buf, count); \ +} + +#define gov_pol_attr_rw(_name) \ + static struct freq_attr _name##_gov_pol = \ + __ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) + +#define show_store_gov_pol_sys(file_name) \ + show_gov_pol_sys(file_name); \ + store_gov_pol_sys(file_name) +#define tunable_handlers(file_name) \ + show_gov_pol_sys(file_name); \ + store_gov_pol_sys(file_name); \ + gov_pol_attr_rw(file_name) + +tunable_handlers(down_throttle_nsec); +tunable_handlers(up_throttle_nsec); + +/* Per policy governor instance */ +static struct attribute *sched_attributes_gov_pol[] = { + &up_throttle_nsec_gov_pol.attr, + &down_throttle_nsec_gov_pol.attr, + NULL, +}; + +static struct attribute_group sched_attr_group_gov_pol = { + .attrs = sched_attributes_gov_pol, + .name = "sched", +}; + #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED static #endif @@ -330,6 +472,7 @@ struct cpufreq_governor cpufreq_gov_sched = { .exit = cpufreq_sched_policy_exit, .start = cpufreq_sched_start, .stop = cpufreq_sched_stop, + .limits = cpufreq_sched_limits, .owner = THIS_MODULE, }; -- GitLab From bf47bdd1807b2abdcbe989336bdc1da5c2389f29 Mon Sep 17 00:00:00 2001 From: Srinath Sridharan Date: Thu, 14 Jul 2016 09:57:29 +0100 Subject: [PATCH 1175/1442] ANDROID: sched: EAS: take cstate into account when selecting idle core Introduce a new sysctl for this option, 'sched_cstate_aware'. When this is enabled, select_idle_sibling in CFS is modified to choose the idle CPU in the sibling group which has the lowest idle state index - idle state indexes are assumed to increase as sleep depth and hence wakeup latency increase. In this way, we attempt to minimise wakeup latency when an idle CPU is required. Signed-off-by: Srinath Sridharan Includes: sched: EAS: fix select_idle_sibling when sysctl_sched_cstate_aware is enabled, best_idle cpu will not be chosen in the original flow because it will goto done directly Bug: 30107557 Change-Id: Ie09c2e3960cafbb976f8d472747faefab3b4d6ac Signed-off-by: martin_liu Signed-off-by: Andres Oportus --- include/linux/sched/sysctl.h | 1 + kernel/sched/fair.c | 95 ++++++++++++++++++++++++++++-------- kernel/sysctl.c | 7 +++ 3 files changed, 83 insertions(+), 20 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 2d2cf5daca63..abe17fcf8fd0 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -18,6 +18,7 @@ extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; +extern unsigned int sysctl_sched_cstate_aware; enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 34b6bf939335..0523b7e23de1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -51,6 +51,7 @@ unsigned int sysctl_sched_latency = 6000000ULL; unsigned int normalized_sysctl_sched_latency = 6000000ULL; +unsigned int sysctl_sched_cstate_aware = 1; /* * The initial- and re-scaling of tunables is configurable * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) @@ -6132,33 +6133,87 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t static int select_idle_sibling(struct task_struct *p, int prev, int target) { struct sched_domain *sd; - int i; + struct sched_group *sg; + int i = task_cpu(p); + int best_idle = -1; + int best_idle_cstate = -1; + int best_idle_capacity = INT_MAX; - if (idle_cpu(target)) - return target; + if (!sysctl_sched_cstate_aware) { + if (idle_cpu(target)) + return target; - /* - * If the previous cpu is cache affine and idle, don't be stupid. - */ - if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)) - return prev; + /* + * If the prevous cpu is cache affine and idle, don't be stupid. + */ + if (i != target && cpus_share_cache(i, target) && idle_cpu(i)) + return i; - sd = rcu_dereference(per_cpu(sd_llc, target)); - if (!sd) - return target; + sd = rcu_dereference(per_cpu(sd_llc, target)); + if (!sd) + return target; + + i = select_idle_core(p, sd, target); + if ((unsigned)i < nr_cpumask_bits) + return i; + + i = select_idle_cpu(p, sd, target); + if ((unsigned)i < nr_cpumask_bits) + return i; - i = select_idle_core(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; + i = select_idle_smt(p, sd, target); + if ((unsigned)i < nr_cpumask_bits) + return i; + } - i = select_idle_cpu(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; + /* + * Otherwise, iterate the domains and find an elegible idle cpu. + */ + sd = rcu_dereference(per_cpu(sd_llc, target)); + for_each_lower_domain(sd) { + sg = sd->groups; + do { + if (!cpumask_intersects(sched_group_cpus(sg), + tsk_cpus_allowed(p))) + goto next; + + + if (sysctl_sched_cstate_aware) { + for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) { + struct rq *rq = cpu_rq(i); + int idle_idx = idle_get_state_idx(rq); + unsigned long new_usage = boosted_task_util(p); + unsigned long capacity_orig = capacity_orig_of(i); + if (new_usage > capacity_orig || !idle_cpu(i)) + goto next; + + if (i == target && new_usage <= capacity_curr_of(target)) + return target; + + if (best_idle < 0 || (idle_idx < best_idle_cstate && capacity_orig <= best_idle_capacity)) { + best_idle = i; + best_idle_cstate = idle_idx; + best_idle_capacity = capacity_orig; + } + } + } else { + for_each_cpu(i, sched_group_cpus(sg)) { + if (i == target || !idle_cpu(i)) + goto next; + } - i = select_idle_smt(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; + target = cpumask_first_and(sched_group_cpus(sg), + tsk_cpus_allowed(p)); + goto done; + } +next: + sg = sg->next; + } while (sg != sd->groups); + } + if (best_idle > 0) + target = best_idle; +done: return target; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bcc012aad7bb..3a3567990388 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -307,6 +307,13 @@ static struct ctl_table kern_table[] = { .extra1 = &min_sched_granularity_ns, .extra2 = &max_sched_granularity_ns, }, + { + .procname = "sched_cstate_aware", + .data = &sysctl_sched_cstate_aware, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "sched_wakeup_granularity_ns", .data = &sysctl_sched_wakeup_granularity, -- GitLab From 1931b93dba7f6bb270df5962787275171664833b Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Fri, 29 Jul 2016 14:04:11 +0100 Subject: [PATCH 1176/1442] ANDROID: sched/fair: add tunable to force selection at cpu granularity EAS assumes that clusters with smaller capacity cores are more energy-efficient. This may not be true on non-big-little devices, so EAS can make incorrect cluster selections when finding a CPU to wake. The "sched_is_big_little" hint can be used to cause a cpu-based selection instead of cluster-based selection. This change incorporates the addition of the sync hint enable patch EAS did not honour synchronous wakeup hints, a new sysctl is created to ask EAS to use this information when selecting a CPU. The control is called "sched_sync_hint_enable". Also contains: EAS: sched/fair: for SMP bias toward idle core with capacity For SMP devices, on wakeup bias towards idle cores that have capacity vs busy devices that need a higher OPP eas: favor idle cpus for boosted tasks BUG: 29533997 BUG: 29512132 Change-Id: I0cc9a1b1b88fb52916f18bf2d25715bdc3634f9c Signed-off-by: Juri Lelli Signed-off-by: Srinath Sridharan eas/sched/fair: Favoring busy cpus with low OPPs BUG: 29533997 BUG: 29512132 Change-Id: I9305b3239698d64278db715a2e277ea0bb4ece79 Signed-off-by: Juri Lelli Signed-off-by: Andres Oportus --- include/linux/sched/sysctl.h | 2 + kernel/sched/fair.c | 189 +++++++++++++++++++++++++++-------- kernel/sysctl.c | 14 +++ 3 files changed, 166 insertions(+), 39 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index abe17fcf8fd0..d2a69b5540d1 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -18,6 +18,8 @@ extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; +extern unsigned int sysctl_sched_is_big_little; +extern unsigned int sysctl_sched_sync_hint_enable; extern unsigned int sysctl_sched_cstate_aware; enum sched_tunable_scaling { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0523b7e23de1..8128ccd05bad 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -51,7 +51,10 @@ unsigned int sysctl_sched_latency = 6000000ULL; unsigned int normalized_sysctl_sched_latency = 6000000ULL; +unsigned int sysctl_sched_is_big_little = 0; +unsigned int sysctl_sched_sync_hint_enable = 1; unsigned int sysctl_sched_cstate_aware = 1; + /* * The initial- and re-scaling of tunables is configurable * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) @@ -6217,7 +6220,97 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) return target; } -static int energy_aware_wake_cpu(struct task_struct *p, int target) +static inline int find_best_target(struct task_struct *p) +{ + int i, boosted; + int target_cpu = -1; + int target_capacity = 0; + int backup_capacity = 0; + int idle_cpu = -1; + int best_idle_cstate = INT_MAX; + int backup_cpu = -1; + unsigned long task_util_boosted, new_util; + + /* + * Favor 1) busy cpu with most capacity at current OPP + * 2) idle_cpu with capacity at current OPP + * 3) busy cpu with capacity at higher OPP + */ +#ifdef CONFIG_CGROUP_SCHEDTUNE + boosted = schedtune_task_boost(p); +#else + boosted = 0; +#endif + task_util_boosted = boosted_task_util(p); + for_each_cpu(i, tsk_cpus_allowed(p)) { + int cur_capacity = capacity_curr_of(i); + struct rq *rq = cpu_rq(i); + int idle_idx = idle_get_state_idx(rq); + + /* + * p's blocked utilization is still accounted for on prev_cpu + * so prev_cpu will receive a negative bias due to the double + * accounting. However, the blocked utilization may be zero. + */ + new_util = cpu_util(i) + task_util_boosted; + + /* + * Ensure minimum capacity to grant the required boost. + * The target CPU can be already at a capacity level higher + * than the one required to boost the task. + */ + + if (new_util > capacity_orig_of(i)) + continue; + + /* + * For boosted tasks we favor idle cpus unconditionally to + * improve latency. + */ + if (idle_idx >= 0 && boosted) { + if (idle_cpu < 0 || + (sysctl_sched_cstate_aware && + best_idle_cstate > idle_idx)) { + best_idle_cstate = idle_idx; + idle_cpu = i; + } + continue; + } + + if (new_util < cur_capacity) { + if (cpu_rq(i)->nr_running) { + if (target_capacity == 0 || + target_capacity > cur_capacity) { + /* busy CPU with most capacity at current OPP */ + target_cpu = i; + target_capacity = cur_capacity; + } + } else if (!boosted) { + if (idle_cpu < 0 || + (sysctl_sched_cstate_aware && + best_idle_cstate > idle_idx)) { + best_idle_cstate = idle_idx; + idle_cpu = i; + } + } + } else if (backup_capacity == 0 || + backup_capacity > cur_capacity) { + /* first busy CPU with capacity at higher OPP */ + backup_capacity = cur_capacity; + backup_cpu = i; + } + } + + if (!boosted && target_cpu < 0) { + target_cpu = idle_cpu >= 0 ? idle_cpu : backup_cpu; + } + + if (boosted && idle_cpu >= 0) + target_cpu = idle_cpu; + return target_cpu; +} + +static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync) { struct sched_domain *sd; struct sched_group *sg, *sg_target; @@ -6225,6 +6318,14 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target) int target_cpu = task_cpu(p); int i; + if (sysctl_sched_sync_hint_enable && sync) { + int cpu = smp_processor_id(); + cpumask_t search_cpus; + cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask); + if (cpumask_test_cpu(cpu, &search_cpus)) + return cpu; + } + sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p))); if (!sd) @@ -6233,50 +6334,60 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target) sg = sd->groups; sg_target = sg; - /* - * Find group with sufficient capacity. We only get here if no cpu is - * overutilized. We may end up overutilizing a cpu by adding the task, - * but that should not be any worse than select_idle_sibling(). - * load_balance() should sort it out later as we get above the tipping - * point. - */ - do { - /* Assuming all cpus are the same in group */ - int max_cap_cpu = group_first_cpu(sg); + if (sysctl_sched_is_big_little) { /* - * Assume smaller max capacity means more energy-efficient. - * Ideally we should query the energy model for the right - * answer but it easily ends up in an exhaustive search. + * Find group with sufficient capacity. We only get here if no cpu is + * overutilized. We may end up overutilizing a cpu by adding the task, + * but that should not be any worse than select_idle_sibling(). + * load_balance() should sort it out later as we get above the tipping + * point. */ - if (capacity_of(max_cap_cpu) < target_max_cap && - task_fits_max(p, max_cap_cpu)) { - sg_target = sg; - target_max_cap = capacity_of(max_cap_cpu); - } - } while (sg = sg->next, sg != sd->groups); + do { + /* Assuming all cpus are the same in group */ + int max_cap_cpu = group_first_cpu(sg); - /* Find cpu with sufficient capacity */ - for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) { - /* - * p's blocked utilization is still accounted for on prev_cpu - * so prev_cpu will receive a negative bias due to the double - * accounting. However, the blocked utilization may be zero. - */ - int new_util = cpu_util(i) + boosted_task_util(p); + /* + * Assume smaller max capacity means more energy-efficient. + * Ideally we should query the energy model for the right + * answer but it easily ends up in an exhaustive search. + */ + if (capacity_of(max_cap_cpu) < target_max_cap && + task_fits_max(p, max_cap_cpu)) { + sg_target = sg; + target_max_cap = capacity_of(max_cap_cpu); + } + } while (sg = sg->next, sg != sd->groups); - if (new_util > capacity_orig_of(i)) - continue; + /* Find cpu with sufficient capacity */ + for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) { + /* + * p's blocked utilization is still accounted for on prev_cpu + * so prev_cpu will receive a negative bias due to the double + * accounting. However, the blocked utilization may be zero. + */ + int new_util = cpu_util(i) + boosted_task_util(p); - if (new_util < capacity_curr_of(i)) { - target_cpu = i; - if (cpu_rq(i)->nr_running) - break; - } + if (new_util > capacity_orig_of(i)) + continue; + + if (new_util < capacity_curr_of(i)) { + target_cpu = i; + if (cpu_rq(i)->nr_running) + break; + } - /* cpu has capacity at higher OPP, keep it as fallback */ - if (target_cpu == task_cpu(p)) - target_cpu = i; + /* cpu has capacity at higher OPP, keep it as fallback */ + if (target_cpu == task_cpu(p)) + target_cpu = i; + } + } else { + /* + * Find a cpu with sufficient capacity + */ + int tmp_target = find_best_target(p); + if (tmp_target >= 0) + target_cpu = tmp_target; } if (target_cpu != task_cpu(p)) { @@ -6355,7 +6466,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (!sd) { if (energy_aware() && !cpu_rq(cpu)->rd->overutilized) - new_cpu = energy_aware_wake_cpu(p, prev_cpu); + new_cpu = energy_aware_wake_cpu(p, prev_cpu, sync); else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3a3567990388..a025ba15a09a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -307,6 +307,20 @@ static struct ctl_table kern_table[] = { .extra1 = &min_sched_granularity_ns, .extra2 = &max_sched_granularity_ns, }, + { + .procname = "sched_is_big_little", + .data = &sysctl_sched_is_big_little, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_sync_hint_enable", + .data = &sysctl_sched_sync_hint_enable, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "sched_cstate_aware", .data = &sysctl_sched_cstate_aware, -- GitLab From c6a6f3bfa01a363136b209d7f6193fd2d7a0524c Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 11 Mar 2016 16:44:16 -0800 Subject: [PATCH 1177/1442] ANDROID: sched/fair: add tunable to set initial task load The choice of initial task load upon fork has a large influence on CPU and OPP selection when scheduler-driven DVFS is in use. Make this tuneable by adding a new sysctl "sched_initial_task_util". If the sched governor is not used, the default remains at SCHED_LOAD_SCALE Otherwise, the value from the sysctl is used. This defaults to 0. Signed-off-by: "Todd Kjos " Signed-off-by: Andres Oportus --- include/linux/sched/sysctl.h | 1 + kernel/sched/fair.c | 6 +++++- kernel/sysctl.c | 7 +++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index d2a69b5540d1..02757437db29 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -20,6 +20,7 @@ extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_is_big_little; extern unsigned int sysctl_sched_sync_hint_enable; +extern unsigned int sysctl_sched_initial_task_util; extern unsigned int sysctl_sched_cstate_aware; enum sched_tunable_scaling { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8128ccd05bad..f0dec981aca7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -53,6 +53,7 @@ unsigned int normalized_sysctl_sched_latency = 6000000ULL; unsigned int sysctl_sched_is_big_little = 0; unsigned int sysctl_sched_sync_hint_enable = 1; +unsigned int sysctl_sched_initial_task_util = 0; unsigned int sysctl_sched_cstate_aware = 1; /* @@ -704,10 +705,13 @@ void init_entity_runnable_average(struct sched_entity *se) if (entity_is_task(se)) sa->load_avg = scale_load_down(se->load.weight); sa->load_sum = sa->load_avg * LOAD_AVG_MAX; + /* * At this point, util_avg won't be used in select_task_rq_fair anyway */ - sa->util_avg = 0; + sa->util_avg = sched_freq() ? + sysctl_sched_initial_task_util : + 0; sa->util_sum = 0; /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */ } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a025ba15a09a..2ed4bf05045e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -321,6 +321,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sched_initial_task_util", + .data = &sysctl_sched_initial_task_util, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "sched_cstate_aware", .data = &sysctl_sched_cstate_aware, -- GitLab From 4a5fe5430cad90490788e9dcb6d08f4fc53a3cc8 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Thu, 28 Jul 2016 16:39:27 +0100 Subject: [PATCH 1178/1442] ANDROID: FIX: sched/tune: update usage of boosted task utilisation on CPU selection A boosted task needs to be scheduled on a CPU which can grant a minimum capacity which is higher than its utilization. However, a task can be allocated on a CPU which already provides an utilization which is higher than the task boosted utilization itself. Moreover, with the previous approach a task 100% boosted is not fitting any CPU. This patch makes use of the boosted task utilization just as a threashold which defines the minimum capacity should be available on a CPU to host that task. Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f0dec981aca7..c8715b5b27d9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6320,6 +6320,7 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync) struct sched_group *sg, *sg_target; int target_max_cap = INT_MAX; int target_cpu = task_cpu(p); + unsigned long task_util_boosted, new_util; int i; if (sysctl_sched_sync_hint_enable && sync) { @@ -6363,6 +6364,7 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync) } } while (sg = sg->next, sg != sd->groups); + task_util_boosted = boosted_task_util(p); /* Find cpu with sufficient capacity */ for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) { /* @@ -6370,8 +6372,13 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync) * so prev_cpu will receive a negative bias due to the double * accounting. However, the blocked utilization may be zero. */ - int new_util = cpu_util(i) + boosted_task_util(p); + new_util = cpu_util(i) + task_util_boosted; + /* + * Ensure minimum capacity to grant the required boost. + * The target CPU can be already at a capacity level higher + * than the one required to boost the task. + */ if (new_util > capacity_orig_of(i)) continue; -- GitLab From c5b2042fbcb787a5fdf2f8ed37651f706c9584ac Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Fri, 29 Jul 2016 15:45:57 +0100 Subject: [PATCH 1179/1442] ANDROID: FIX: sched/tune: move schedtune_nornalize_energy into fair.c The energy normalization function is required to get the proper values for the P-E space filtering function to work. That normalization is part of the hot wakeup path and currently implemented with a function call. Moving the normalization function into fair.c allows the compiler to further optimize that code by reducing overheads in the wakeup hot path. Signed-off-by: Patrick Bellasi [jstultz: fwdported to 4.4] Signed-off-by: John Stultz Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 121 ++++++++++++++++++++++++++++---------------- kernel/sched/tune.c | 42 +-------------- kernel/sched/tune.h | 12 ++++- 3 files changed, 91 insertions(+), 84 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c8715b5b27d9..3419d8651727 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5431,44 +5431,6 @@ static inline bool cpu_in_sg(struct sched_group *sg, int cpu) return cpu != -1 && cpumask_test_cpu(cpu, sched_group_cpus(sg)); } -#ifdef CONFIG_SCHED_TUNE -static int energy_diff_evaluate(struct energy_env *eenv) -{ - unsigned int boost; - int nrg_delta; - - /* Return energy diff when boost margin is 0 */ -#ifdef CONFIG_CGROUP_SCHEDTUNE - boost = schedtune_task_boost(eenv->task); -#else - boost = get_sysctl_sched_cfs_boost(); -#endif - if (boost == 0) - return eenv->nrg.diff; - - /* Compute normalized energy diff */ - nrg_delta = schedtune_normalize_energy(eenv->nrg.diff); - eenv->nrg.delta = nrg_delta; - - eenv->payoff = schedtune_accept_deltas( - eenv->nrg.delta, - eenv->cap.delta, - eenv->task); - - /* - * When SchedTune is enabled, the energy_diff() function will return - * the computed energy payoff value. Since the energy_diff() return - * value is expected to be negative by its callers, this evaluation - * function return a negative value each time the evaluation return a - * positive payoff, which is the condition for the acceptance of - * a scheduling decision - */ - return -eenv->payoff; -} -#else /* CONFIG_SCHED_TUNE */ -#define energy_diff_evaluate(eenv) eenv->nrg.diff -#endif - /* * energy_diff(): Estimate the energy impact of changing the utilization * distribution. eenv specifies the change: utilisation amount, source, and @@ -5476,12 +5438,11 @@ static int energy_diff_evaluate(struct energy_env *eenv) * utilization is removed from or added to the system (e.g. task wake-up). If * both are specified, the utilization is migrated. */ -static int energy_diff(struct energy_env *eenv) +static inline int __energy_diff(struct energy_env *eenv) { struct sched_domain *sd; struct sched_group *sg; int sd_cpu = -1, energy_before = 0, energy_after = 0; - int result; struct energy_env eenv_before = { .util_delta = 0, @@ -5525,16 +5486,90 @@ static int energy_diff(struct energy_env *eenv) eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before; eenv->payoff = 0; - result = energy_diff_evaluate(eenv); - trace_sched_energy_diff(eenv->task, eenv->src_cpu, eenv->dst_cpu, eenv->util_delta, eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff, eenv->cap.before, eenv->cap.after, eenv->cap.delta, eenv->nrg.delta, eenv->payoff); - return result; + return eenv->nrg.diff; +} + +#ifdef CONFIG_SCHED_TUNE + +struct target_nrg schedtune_target_nrg; + +/* + * System energy normalization + * Returns the normalized value, in the range [0..SCHED_LOAD_SCALE], + * corresponding to the specified energy variation. + */ +static inline int +normalize_energy(int energy_diff) +{ + u32 normalized_nrg; +#ifdef CONFIG_SCHED_DEBUG + int max_delta; + + /* Check for boundaries */ + max_delta = schedtune_target_nrg.max_power; + max_delta -= schedtune_target_nrg.min_power; + WARN_ON(abs(energy_diff) >= max_delta); +#endif + + /* Do scaling using positive numbers to increase the range */ + normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff; + + /* Scale by energy magnitude */ + normalized_nrg <<= SCHED_CAPACITY_SHIFT; + + /* Normalize on max energy for target platform */ + normalized_nrg = reciprocal_divide( + normalized_nrg, schedtune_target_nrg.rdiv); + + return (energy_diff < 0) ? -normalized_nrg : normalized_nrg; +} + +static inline int +energy_diff(struct energy_env *eenv) +{ + unsigned int boost; + int nrg_delta; + + /* Conpute "absolute" energy diff */ + __energy_diff(eenv); + + /* Return energy diff when boost margin is 0 */ +#ifdef CONFIG_CGROUP_SCHEDTUNE + boost = schedtune_task_boost(eenv->task); +#else + boost = get_sysctl_sched_cfs_boost(); +#endif + if (boost == 0) + return eenv->nrg.diff; + + /* Compute normalized energy diff */ + nrg_delta = normalize_energy(eenv->nrg.diff); + eenv->nrg.delta = nrg_delta; + + eenv->payoff = schedtune_accept_deltas( + eenv->nrg.delta, + eenv->cap.delta, + eenv->task); + + /* + * When SchedTune is enabled, the energy_diff() function will return + * the computed energy payoff value. Since the energy_diff() return + * value is expected to be negative by its callers, this evaluation + * function return a negative value each time the evaluation return a + * positive payoff, which is the condition for the acceptance of + * a scheduling decision + */ + return -eenv->payoff; } +#else /* CONFIG_SCHED_TUNE */ +#define energy_diff(eenv) __energy_diff(eenv) +#endif /* * Detect M:N waker/wakee relationships via a switching-frequency heuristic. diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 4b385845c958..3a8784eedaf4 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -3,24 +3,17 @@ #include #include #include -#include #include #include #include #include "sched.h" +#include "tune.h" unsigned int sysctl_sched_cfs_boost __read_mostly; -/* - * System energy normalization constants - */ -static struct target_nrg { - unsigned long min_power; - unsigned long max_power; - struct reciprocal_value rdiv; -} schedtune_target_nrg; +extern struct target_nrg schedtune_target_nrg; /* Performance Boost region (B) threshold params */ static int perf_boost_idx; @@ -587,37 +580,6 @@ sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write, return 0; } -/* - * System energy normalization - * Returns the normalized value, in the range [0..SCHED_LOAD_SCALE], - * corresponding to the specified energy variation. - */ -int -schedtune_normalize_energy(int energy_diff) -{ - u32 normalized_nrg; - int max_delta; - -#ifdef CONFIG_SCHED_DEBUG - /* Check for boundaries */ - max_delta = schedtune_target_nrg.max_power; - max_delta -= schedtune_target_nrg.min_power; - WARN_ON(abs(energy_diff) >= max_delta); -#endif - - /* Do scaling using positive numbers to increase the range */ - normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff; - - /* Scale by energy magnitude */ - normalized_nrg <<= SCHED_CAPACITY_SHIFT; - - /* Normalize on max energy for target platform */ - normalized_nrg = reciprocal_divide( - normalized_nrg, schedtune_target_nrg.rdiv); - - return (energy_diff < 0) ? -normalized_nrg : normalized_nrg; -} - #ifdef CONFIG_SCHED_DEBUG static void schedtune_test_nrg(unsigned long delta_pwr) diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h index f7273a5d994a..7d2aa7951554 100644 --- a/kernel/sched/tune.h +++ b/kernel/sched/tune.h @@ -1,6 +1,17 @@ #ifdef CONFIG_SCHED_TUNE +#include + +/* + * System energy normalization constants + */ +struct target_nrg { + unsigned long min_power; + unsigned long max_power; + struct reciprocal_value rdiv; +}; + #ifdef CONFIG_CGROUP_SCHEDTUNE int schedtune_cpu_boost(int cpu); @@ -25,7 +36,6 @@ int schedtune_accept_deltas(int nrg_delta, int cap_delta, #define schedtune_enqueue_task(task, cpu) do { } while (0) #define schedtune_dequeue_task(task, cpu) do { } while (0) -#define schedtune_normalize_energy(energy) energy #define schedtune_accept_deltas(nrg_delta, cap_delta, task) nrg_delta #endif /* CONFIG_SCHED_TUNE */ -- GitLab From e71c4255169ddacf3472f09b0c7fbe0fe808214e Mon Sep 17 00:00:00 2001 From: Srinath Sridharan Date: Thu, 28 Jul 2016 17:28:55 +0100 Subject: [PATCH 1180/1442] ANDROID: sched/tune: Add support for negative boost values Change-Id: I164ee04ba98c3a776605f18cb65ee61b3e917939 Contains also: eas/stune: schedtune cpu boost_max must be non-negative. This is to avoid under-accounting cpu capacity which may cause task stacking and frequency spikes. Change-Id: Ie1c1cbd52a6edb77b4c15a830030aa748dff6f29 Signed-off-by: Andres Oportus --- include/trace/events/sched.h | 20 ++++++++++---------- kernel/sched/fair.c | 36 +++++++++++++++++++++--------------- kernel/sched/tune.c | 25 ++++++++++++++++--------- 3 files changed, 47 insertions(+), 34 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index a8864dc58405..28215ae31eab 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -706,14 +706,14 @@ TRACE_EVENT(sched_tune_config, */ TRACE_EVENT(sched_boost_cpu, - TP_PROTO(int cpu, unsigned long util, unsigned long margin), + TP_PROTO(int cpu, unsigned long util, long margin), TP_ARGS(cpu, util, margin), TP_STRUCT__entry( __field( int, cpu ) __field( unsigned long, util ) - __field( unsigned long, margin ) + __field(long, margin ) ), TP_fast_assign( @@ -722,7 +722,7 @@ TRACE_EVENT(sched_boost_cpu, __entry->margin = margin; ), - TP_printk("cpu=%d util=%lu margin=%lu", + TP_printk("cpu=%d util=%lu margin=%ld", __entry->cpu, __entry->util, __entry->margin) @@ -734,7 +734,7 @@ TRACE_EVENT(sched_boost_cpu, TRACE_EVENT(sched_tune_tasks_update, TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx, - unsigned int boost, unsigned int max_boost), + int boost, int max_boost), TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost), @@ -744,8 +744,8 @@ TRACE_EVENT(sched_tune_tasks_update, __field( int, cpu ) __field( int, tasks ) __field( int, idx ) - __field( unsigned int, boost ) - __field( unsigned int, max_boost ) + __field( int, boost ) + __field( int, max_boost ) ), TP_fast_assign( @@ -759,7 +759,7 @@ TRACE_EVENT(sched_tune_tasks_update, ), TP_printk("pid=%d comm=%s " - "cpu=%d tasks=%d idx=%d boost=%u max_boost=%u", + "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d", __entry->pid, __entry->comm, __entry->cpu, __entry->tasks, __entry->idx, __entry->boost, __entry->max_boost) @@ -795,7 +795,7 @@ TRACE_EVENT(sched_tune_boostgroup_update, */ TRACE_EVENT(sched_boost_task, - TP_PROTO(struct task_struct *tsk, unsigned long util, unsigned long margin), + TP_PROTO(struct task_struct *tsk, unsigned long util, long margin), TP_ARGS(tsk, util, margin), @@ -803,7 +803,7 @@ TRACE_EVENT(sched_boost_task, __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( unsigned long, util ) - __field( unsigned long, margin ) + __field( long, margin ) ), @@ -814,7 +814,7 @@ TRACE_EVENT(sched_boost_task, __entry->margin = margin; ), - TP_printk("comm=%s pid=%d util=%lu margin=%lu", + TP_printk("comm=%s pid=%d util=%lu margin=%ld", __entry->comm, __entry->pid, __entry->util, __entry->margin) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3419d8651727..95a10bf0675c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5709,22 +5709,26 @@ static bool cpu_overutilized(int cpu) #ifdef CONFIG_SCHED_TUNE -static unsigned long -schedtune_margin(unsigned long signal, unsigned long boost) +static long +schedtune_margin(unsigned long signal, long boost) { - unsigned long long margin = 0; + long long margin = 0; /* * Signal proportional compensation (SPC) * * The Boost (B) value is used to compute a Margin (M) which is * proportional to the complement of the original Signal (S): - * M = B * (SCHED_LOAD_SCALE - S) + * M = B * (SCHED_LOAD_SCALE - S), if B is positive + * M = B * S, if B is negative * The obtained M could be used by the caller to "boost" S. */ - margin = SCHED_CAPACITY_SCALE - signal; - margin *= boost; + if (boost >= 0) { + margin = SCHED_CAPACITY_SCALE - signal; + margin *= boost; + } else + margin = -signal * boost; /* * Fast integer division by constant: * Constant : (C) = 100 @@ -5740,13 +5744,15 @@ schedtune_margin(unsigned long signal, unsigned long boost) margin *= 1311; margin >>= 17; + if (boost < 0) + margin *= -1; return margin; } -static inline unsigned int +static inline int schedtune_cpu_margin(unsigned long util, int cpu) { - unsigned int boost; + int boost; #ifdef CONFIG_CGROUP_SCHEDTUNE boost = schedtune_cpu_boost(cpu); @@ -5759,12 +5765,12 @@ schedtune_cpu_margin(unsigned long util, int cpu) return schedtune_margin(util, boost); } -static inline unsigned long +static inline long schedtune_task_margin(struct task_struct *task) { - unsigned int boost; + int boost; unsigned long util; - unsigned long margin; + long margin; #ifdef CONFIG_CGROUP_SCHEDTUNE boost = schedtune_task_boost(task); @@ -5782,13 +5788,13 @@ schedtune_task_margin(struct task_struct *task) #else /* CONFIG_SCHED_TUNE */ -static inline unsigned int +static inline int schedtune_cpu_margin(unsigned long util, int cpu) { return 0; } -static inline unsigned int +static inline int schedtune_task_margin(struct task_struct *task) { return 0; @@ -5800,7 +5806,7 @@ static inline unsigned long boosted_cpu_util(int cpu) { unsigned long util = cpu_util(cpu); - unsigned long margin = schedtune_cpu_margin(util, cpu); + long margin = schedtune_cpu_margin(util, cpu); trace_sched_boost_cpu(cpu, util, margin); @@ -5811,7 +5817,7 @@ static inline unsigned long boosted_task_util(struct task_struct *task) { unsigned long util = task_util(task); - unsigned long margin = schedtune_task_margin(task); + long margin = schedtune_task_margin(task); trace_sched_boost_task(task, util, margin); diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 3a8784eedaf4..9f53d9fb5df1 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -213,10 +213,11 @@ static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = { */ struct boost_groups { /* Maximum boost value for all RUNNABLE tasks on a CPU */ - unsigned boost_max; + bool idle; + int boost_max; struct { /* The boost for tasks on that boost group */ - unsigned boost; + int boost; /* Count of RUNNABLE tasks on that boost group */ unsigned tasks; } group[BOOSTGROUPS_COUNT]; @@ -229,7 +230,7 @@ static void schedtune_cpu_update(int cpu) { struct boost_groups *bg; - unsigned boost_max; + int boost_max; int idx; bg = &per_cpu(cpu_boost_groups, cpu); @@ -243,9 +244,13 @@ schedtune_cpu_update(int cpu) */ if (bg->group[idx].tasks == 0) continue; + boost_max = max(boost_max, bg->group[idx].boost); } - + /* Ensures boost_max is non-negative when all cgroup boost values + * are neagtive. Avoids under-accounting of cpu capacity which may cause + * task stacking and frequency spikes.*/ + boost_max = max(boost_max, 0); bg->boost_max = boost_max; } @@ -391,7 +396,7 @@ int schedtune_task_boost(struct task_struct *p) return task_boost; } -static u64 +static s64 boost_read(struct cgroup_subsys_state *css, struct cftype *cft) { struct schedtune *st = css_st(css); @@ -401,11 +406,13 @@ boost_read(struct cgroup_subsys_state *css, struct cftype *cft) static int boost_write(struct cgroup_subsys_state *css, struct cftype *cft, - u64 boost) + s64 boost) { struct schedtune *st = css_st(css); + unsigned threshold_idx; + int boost_pct; - if (boost < 0 || boost > 100) + if (boost < -100 || boost > 100) return -EINVAL; st->boost = boost; @@ -423,8 +430,8 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft, static struct cftype files[] = { { .name = "boost", - .read_u64 = boost_read, - .write_u64 = boost_write, + .read_s64 = boost_read, + .write_s64 = boost_write, }, { } /* terminate */ }; -- GitLab From 2ed513e7b51eb6f6cacc581c4026677ff33f4cbe Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Thu, 28 Jul 2016 17:38:25 +0100 Subject: [PATCH 1181/1442] ANDROID: FIXUP: sched/tune: fix payoff calculation for boost region The definition of the acceptance regions as well as the translation of these regions into a payoff value was both wrong which turned out in: a) a wrong definition of payoff for the performance boost region b) a correct "by chance" definition of the payoff for the performance constraint region (i.e. two sign errors together fixing the formula) This patch provides a better description of the cut regions as well as a fixed version of the payoff computations, which are now reduced to a single formula usable for both cases. Reported-by: Leo Yan Reviewed-by: Leo Yan Signed-off-by: Leo Yan Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/tune.c | 77 +++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 9f53d9fb5df1..353a8f62af58 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -51,50 +51,51 @@ __schedtune_accept_deltas(int nrg_delta, int cap_delta, int perf_boost_idx, int perf_constrain_idx) { int payoff = -INT_MAX; + int gain_idx = -1; /* Performance Boost (B) region */ - if (nrg_delta > 0 && cap_delta > 0) { - /* - * Evaluate "Performance Boost" vs "Energy Increase" - * payoff criteria: - * cap_delta / nrg_delta < cap_gain / nrg_gain - * which is: - * nrg_delta * cap_gain > cap_delta * nrg_gain - */ - payoff = nrg_delta * threshold_gains[perf_boost_idx].cap_gain; - payoff -= cap_delta * threshold_gains[perf_boost_idx].nrg_gain; - - trace_sched_tune_filter( - nrg_delta, cap_delta, - threshold_gains[perf_boost_idx].nrg_gain, - threshold_gains[perf_boost_idx].cap_gain, - payoff, 8); - - return payoff; - } - + if (nrg_delta >= 0 && cap_delta > 0) + gain_idx = perf_boost_idx; /* Performance Constraint (C) region */ - if (nrg_delta < 0 && cap_delta < 0) { - /* - * Evaluate "Performance Boost" vs "Energy Increase" - * payoff criteria: - * cap_delta / nrg_delta > cap_gain / nrg_gain - * which is: - * cap_delta * nrg_gain > nrg_delta * cap_gain - */ - payoff = cap_delta * threshold_gains[perf_constrain_idx].nrg_gain; - payoff -= nrg_delta * threshold_gains[perf_constrain_idx].cap_gain; - - trace_sched_tune_filter( - nrg_delta, cap_delta, - threshold_gains[perf_constrain_idx].nrg_gain, - threshold_gains[perf_constrain_idx].cap_gain, - payoff, 6); + else if (nrg_delta < 0 && cap_delta <= 0) + gain_idx = perf_constrain_idx; + /* Default: reject schedule candidate */ + if (gain_idx == -1) return payoff; - } - /* Default: reject schedule candidate */ + /* + * Evaluate "Performance Boost" vs "Energy Increase" + * + * - Performance Boost (B) region + * + * Condition: nrg_delta > 0 && cap_delta > 0 + * Payoff criteria: + * cap_gain / nrg_gain < cap_delta / nrg_delta = + * cap_gain * nrg_delta < cap_delta * nrg_gain + * Note that since both nrg_gain and nrg_delta are positive, the + * inequality does not change. Thus: + * + * payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta) + * + * - Performance Constraint (C) region + * + * Condition: nrg_delta < 0 && cap_delta < 0 + * payoff criteria: + * cap_gain / nrg_gain > cap_delta / nrg_delta = + * cap_gain * nrg_delta < cap_delta * nrg_gain + * Note that since nrg_gain > 0 while nrg_delta < 0, the + * inequality change. Thus: + * + * payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta) + * + * This means that, in case of same positive defined {cap,nrg}_gain + * for both the B and C regions, we can use the same payoff formula + * where a positive value represents the accept condition. + */ + payoff = cap_delta * threshold_gains[gain_idx].nrg_gain; + payoff -= nrg_delta * threshold_gains[gain_idx].cap_gain; + return payoff; } -- GitLab From 52cb67ebbc228853b459c7fa2704c516e567be3c Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Fri, 29 Jul 2016 15:19:41 +0100 Subject: [PATCH 1182/1442] ANDROID: sched/tune: use a single initialisation function With the introduction of initialization function required to compute the energy normalization constants from DTB at boot time, we have now a late_initcall which is already used by SchedTune. This patch consolidate within that function the other initialization bits which was previously deferred to the first CGroup creation. Signed-off-by: Patrick Bellasi [jstultz: fwdported to 4.4] Signed-off-by: John Stultz Signed-off-by: Andres Oportus --- kernel/sched/tune.c | 50 +++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 353a8f62af58..55a6bb36f2ca 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -410,8 +410,6 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft, s64 boost) { struct schedtune *st = css_st(css); - unsigned threshold_idx; - int boost_pct; if (boost < -100 || boost > 100) return -EINVAL; @@ -456,33 +454,14 @@ schedtune_boostgroup_init(struct schedtune *st) return 0; } -static int -schedtune_init(void) -{ - struct boost_groups *bg; - int cpu; - - /* Initialize the per CPU boost groups */ - for_each_possible_cpu(cpu) { - bg = &per_cpu(cpu_boost_groups, cpu); - memset(bg, 0, sizeof(struct boost_groups)); - } - - pr_info(" schedtune configured to support %d boost groups\n", - BOOSTGROUPS_COUNT); - return 0; -} - static struct cgroup_subsys_state * schedtune_css_alloc(struct cgroup_subsys_state *parent_css) { struct schedtune *st; int idx; - if (!parent_css) { - schedtune_init(); + if (!parent_css) return &root_schedtune.css; - } /* Allow only single level hierachies */ if (parent_css != &root_schedtune.css) { @@ -543,6 +522,22 @@ struct cgroup_subsys schedtune_cgrp_subsys = { .early_init = 1, }; +static inline void +schedtune_init_cgroups(void) +{ + struct boost_groups *bg; + int cpu; + + /* Initialize the per CPU boost groups */ + for_each_possible_cpu(cpu) { + bg = &per_cpu(cpu_boost_groups, cpu); + memset(bg, 0, sizeof(struct boost_groups)); + } + + pr_info("schedtune: configured to support %d boost groups\n", + BOOSTGROUPS_COUNT); +} + #else /* CONFIG_CGROUP_SCHEDTUNE */ int @@ -690,7 +685,7 @@ schedtune_add_cluster_nrg( * that bind the EM to the topology information. */ static int -schedtune_init_late(void) +schedtune_init(void) { struct target_nrg *ste = &schedtune_target_nrg; unsigned long delta_pwr = 0; @@ -730,10 +725,17 @@ schedtune_init_late(void) ste->rdiv.m, ste->rdiv.sh1, ste->rdiv.sh2); schedtune_test_nrg(delta_pwr); + +#ifdef CONFIG_CGROUP_SCHEDTUNE + schedtune_init_cgroups(); +#else + pr_info("schedtune: configured to support global boosting only\n"); +#endif + return 0; nodata: rcu_read_unlock(); return -EINVAL; } -late_initcall(schedtune_init_late); +late_initcall(schedtune_init); -- GitLab From d248900606d3174c301c651ff6ca98c2a5dd9a28 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Thu, 28 Jul 2016 18:44:40 +0100 Subject: [PATCH 1183/1442] ANDROID: FIXUP: sched/tune: fix accounting for runnable tasks Contains: sched/tune: fix accounting for runnable tasks (1/5) The accounting for tasks into boost groups of different CPUs is currently broken mainly because: a) we do not properly track the change of boost group of a RUNNABLE task b) there are race conditions between migration code and accounting code This patch provides a fixes to ensure enqueue/dequeue accounting also for throttled tasks. Without this patch is can happen that a task is enqueued into a throttled RQ thus not being accounted for the boosting of the corresponding RQ. We could argue that a throttled task should not boost a CPU, however: a) properly implementing CPU boosting considering throttled tasks will increase a lot the complexity of the solution b) it's not easy to quantify the benefits introduced by such a more complex solution Since task throttling requires the usage of the CFS bandwidth controller, which is not widely used on mobile systems (at least not by Android kernels so far), for the time being we go for the simple solution and boost also for throttled RQs. sched/tune: fix accounting for runnable tasks (2/5) This patch provides the code required to enforce proper locking. A per boost group spinlock has been added to grant atomic accounting of tasks as well as to serialise enqueue/dequeue operations, triggered by tasks migrations, with cgroups's attach/detach operations. sched/tune: fix accounting for runnable tasks (3/5) This patch adds cgroups {allow,can,cancel}_attach callbacks. Since a task can be migrated between boost groups while it's running, the CGroups's attach callbacks have been added to properly migrate boost contributions of RUNNABLE tasks. The RQ's lock is used to serialise enqueue/dequeue operations, triggered by tasks migrations, with cgroups's attach/detach operations. While the SchedTune's CPU lock is used to grant atrocity of the accounting within the CPU. NOTE: the current implementation does not allows a concurrent CPU migration and CGroups change. sched/tune: fix accounting for runnable tasks (4/5) This fixes accounting for exiting tasks by adding a dedicated call early in the do_exit() syscall, which disables SchedTune accounting as soon as a task is flagged PF_EXITING. This flag is set before the multiple dequeue/enqueue dance triggered by cgroup_exit() which is useful only to inject useless tasks movements thus increasing possibilities for race conditions with the migration code. The schedtune_exit_task() call does the last dequeue of a task from its current boost group. This is a solution more aligned with what happens in mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying task to the root control group. sched/tune: fix accounting for runnable tasks (5/5) To avoid accounting issues at startup, this patch disable the SchedTune accounting until the required data structures have been properly initialized. Signed-off-by: Patrick Bellasi [jstultz: fwdported to 4.4] Signed-off-by: John Stultz Signed-off-by: Andres Oportus --- kernel/exit.c | 5 ++ kernel/sched/core.c | 12 +++ kernel/sched/fair.c | 11 ++- kernel/sched/sched.h | 3 + kernel/sched/tune.c | 178 ++++++++++++++++++++++++++++++++++++++----- kernel/sched/tune.h | 12 +++ 6 files changed, 199 insertions(+), 22 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 3076f3089919..46a7c2bafda2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -55,6 +55,8 @@ #include #include +#include "sched/tune.h" + #include #include #include @@ -775,6 +777,9 @@ void __noreturn do_exit(long code) } exit_signals(tsk); /* sets PF_EXITING */ + + schedtune_exit_task(tsk); + /* * Ensure that all new tsk->pi_lock acquisitions must observe * PF_EXITING. Serializes against futex.c:attach_to_pi_owner(). diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4d0c64608454..b6126506dad2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -156,6 +156,18 @@ int sysctl_sched_rt_runtime = 950000; /* cpus with isolated domains */ cpumask_var_t cpu_isolated_map; +struct rq * +lock_rq_of(struct task_struct *p, struct rq_flags *flags) +{ + return task_rq_lock(p, flags); +} + +void +unlock_rq_of(struct rq *rq, struct task_struct *p, struct rq_flags *flags) +{ + task_rq_unlock(rq, p, flags); +} + /* * this_rq_lock - lock this runqueue and disable interrupts. */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 95a10bf0675c..225d106a4dab 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4629,8 +4629,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cpu_overutilized(rq->cpu)) rq->rd->overutilized = true; - schedtune_enqueue_task(p, cpu_of(rq)); - /* * We want to potentially trigger a freq switch * request only for tasks that are waking up; this is @@ -4641,6 +4639,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (task_new || task_wakeup) update_capacity_of(cpu_of(rq)); } + + /* Update SchedTune accouting */ + schedtune_enqueue_task(p, cpu_of(rq)); + #endif /* CONFIG_SMP */ hrtick_update(rq); @@ -4705,7 +4707,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) #ifdef CONFIG_SMP if (!se) { - schedtune_dequeue_task(p, cpu_of(rq)); /* * We want to potentially trigger a freq switch @@ -4723,6 +4724,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) } } + /* Update SchedTune accouting */ + schedtune_dequeue_task(p, cpu_of(rq)); + #endif /* CONFIG_SMP */ hrtick_update(rq); @@ -6304,7 +6308,6 @@ static inline int find_best_target(struct task_struct *p) * The target CPU can be already at a capacity level higher * than the one required to boost the task. */ - if (new_util > capacity_orig_of(i)) continue; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ba2efc714e43..9767d7ebf29a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1710,6 +1710,9 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); } +extern struct rq *lock_rq_of(struct task_struct *p, struct rq_flags *flags); +extern void unlock_rq_of(struct rq *rq, struct task_struct *p, struct rq_flags *flags); + #ifdef CONFIG_SMP #ifdef CONFIG_PREEMPT diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 55a6bb36f2ca..f04c65b18fca 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -11,6 +11,10 @@ #include "sched.h" #include "tune.h" +#ifdef CONFIG_CGROUP_SCHEDTUNE +static bool schedtune_initialized = false; +#endif + unsigned int sysctl_sched_cfs_boost __read_mostly; extern struct target_nrg schedtune_target_nrg; @@ -222,6 +226,8 @@ struct boost_groups { /* Count of RUNNABLE tasks on that boost group */ unsigned tasks; } group[BOOSTGROUPS_COUNT]; + /* CPU's boost group locking */ + raw_spinlock_t lock; }; /* Boost groups affecting each CPU in the system */ @@ -298,28 +304,24 @@ schedtune_boostgroup_update(int idx, int boost) return 0; } +#define ENQUEUE_TASK 1 +#define DEQUEUE_TASK -1 + static inline void schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count) { - struct boost_groups *bg; - int tasks; - - bg = &per_cpu(cpu_boost_groups, cpu); + struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu); + int tasks = bg->group[idx].tasks + task_count; /* Update boosted tasks count while avoiding to make it negative */ - if (task_count < 0 && bg->group[idx].tasks <= -task_count) - bg->group[idx].tasks = 0; - else - bg->group[idx].tasks += task_count; - - /* Boost group activation or deactivation on that RQ */ - tasks = bg->group[idx].tasks; - if (tasks == 1 || tasks == 0) - schedtune_cpu_update(cpu); + bg->group[idx].tasks = max(0, tasks); trace_sched_tune_tasks_update(p, cpu, tasks, idx, bg->group[idx].boost, bg->boost_max); + /* Boost group activation or deactivation on that RQ */ + if (tasks == 1 || tasks == 0) + schedtune_cpu_update(cpu); } /* @@ -327,9 +329,14 @@ schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count) */ void schedtune_enqueue_task(struct task_struct *p, int cpu) { + struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu); + unsigned long irq_flags; struct schedtune *st; int idx; + if (!unlikely(schedtune_initialized)) + return; + /* * When a task is marked PF_EXITING by do_exit() it's going to be * dequeued and enqueued multiple times in the exit path. @@ -339,13 +346,109 @@ void schedtune_enqueue_task(struct task_struct *p, int cpu) if (p->flags & PF_EXITING) return; - /* Get task boost group */ + /* + * Boost group accouting is protected by a per-cpu lock and requires + * interrupt to be disabled to avoid race conditions for example on + * do_exit()::cgroup_exit() and task migration. + */ + raw_spin_lock_irqsave(&bg->lock, irq_flags); rcu_read_lock(); + st = task_schedtune(p); idx = st->idx; + + schedtune_tasks_update(p, cpu, idx, ENQUEUE_TASK); + rcu_read_unlock(); + raw_spin_unlock_irqrestore(&bg->lock, irq_flags); +} - schedtune_tasks_update(p, cpu, idx, 1); +int schedtune_allow_attach(struct cgroup_taskset *tset) +{ + /* We always allows tasks to be moved between existing CGroups */ + return 0; +} + +int schedtune_can_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *css; + struct boost_groups *bg; + struct rq_flags irq_flags; + unsigned int cpu; + struct rq *rq; + int src_bg; /* Source boost group index */ + int dst_bg; /* Destination boost group index */ + int tasks; + + if (!unlikely(schedtune_initialized)) + return 0; + + + cgroup_taskset_for_each(task, css, tset) { + + /* + * Lock the CPU's RQ the task is enqueued to avoid race + * conditions with migration code while the task is being + * accounted + */ + rq = lock_rq_of(task, &irq_flags); + + if (!task->on_rq) { + unlock_rq_of(rq, task, &irq_flags); + continue; + } + + /* + * Boost group accouting is protected by a per-cpu lock and requires + * interrupt to be disabled to avoid race conditions on... + */ + cpu = cpu_of(rq); + bg = &per_cpu(cpu_boost_groups, cpu); + raw_spin_lock(&bg->lock); + + dst_bg = css_st(css)->idx; + src_bg = task_schedtune(task)->idx; + + /* + * Current task is not changing boostgroup, which can + * happen when the new hierarchy is in use. + */ + if (unlikely(dst_bg == src_bg)) { + raw_spin_unlock(&bg->lock); + unlock_rq_of(rq, task, &irq_flags); + continue; + } + + /* + * This is the case of a RUNNABLE task which is switching its + * current boost group. + */ + + /* Move task from src to dst boost group */ + tasks = bg->group[src_bg].tasks - 1; + bg->group[src_bg].tasks = max(0, tasks); + bg->group[dst_bg].tasks += 1; + + raw_spin_unlock(&bg->lock); + unlock_rq_of(rq, task, &irq_flags); + + /* Update CPU boost group */ + if (bg->group[src_bg].tasks == 0 || bg->group[dst_bg].tasks == 1) + schedtune_cpu_update(task_cpu(task)); + + } + + return 0; +} + +void schedtune_cancel_attach(struct cgroup_taskset *tset) +{ + /* This can happen only if SchedTune controller is mounted with + * other hierarchies ane one of them fails. Since usually SchedTune is + * mouted on its own hierarcy, for the time being we do not implement + * a proper rollback mechanism */ + WARN(1, "SchedTune cancel attach not implemented"); } /* @@ -353,26 +456,62 @@ void schedtune_enqueue_task(struct task_struct *p, int cpu) */ void schedtune_dequeue_task(struct task_struct *p, int cpu) { + struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu); + unsigned long irq_flags; struct schedtune *st; int idx; + if (!unlikely(schedtune_initialized)) + return; + /* * When a task is marked PF_EXITING by do_exit() it's going to be * dequeued and enqueued multiple times in the exit path. * Thus we avoid any further update, since we do not want to change * CPU boosting while the task is exiting. - * The last dequeue will be done by cgroup exit() callback. + * The last dequeue is already enforce by the do_exit() code path + * via schedtune_exit_task(). */ if (p->flags & PF_EXITING) return; - /* Get task boost group */ + /* + * Boost group accouting is protected by a per-cpu lock and requires + * interrupt to be disabled to avoid race conditions on... + */ + raw_spin_lock_irqsave(&bg->lock, irq_flags); rcu_read_lock(); + st = task_schedtune(p); idx = st->idx; + + schedtune_tasks_update(p, cpu, idx, DEQUEUE_TASK); + rcu_read_unlock(); + raw_spin_unlock_irqrestore(&bg->lock, irq_flags); +} + +void schedtune_exit_task(struct task_struct *tsk) +{ + struct schedtune *st; + struct rq_flags irq_flags; + unsigned int cpu; + struct rq *rq; + int idx; + + if (!unlikely(schedtune_initialized)) + return; - schedtune_tasks_update(p, cpu, idx, -1); + rq = lock_rq_of(tsk, &irq_flags); + rcu_read_lock(); + + cpu = cpu_of(rq); + st = task_schedtune(tsk); + idx = st->idx; + schedtune_tasks_update(tsk, cpu, idx, DEQUEUE_TASK); + + rcu_read_unlock(); + unlock_rq_of(rq, tsk, &irq_flags); } int schedtune_cpu_boost(int cpu) @@ -518,6 +657,9 @@ schedtune_css_free(struct cgroup_subsys_state *css) struct cgroup_subsys schedtune_cgrp_subsys = { .css_alloc = schedtune_css_alloc, .css_free = schedtune_css_free, +// .allow_attach = schedtune_allow_attach, + .can_attach = schedtune_can_attach, + .cancel_attach = schedtune_cancel_attach, .legacy_cftypes = files, .early_init = 1, }; diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h index 7d2aa7951554..be1785eb1c5b 100644 --- a/kernel/sched/tune.h +++ b/kernel/sched/tune.h @@ -17,11 +17,18 @@ struct target_nrg { int schedtune_cpu_boost(int cpu); int schedtune_task_boost(struct task_struct *tsk); +void schedtune_exit_task(struct task_struct *tsk); + void schedtune_enqueue_task(struct task_struct *p, int cpu); void schedtune_dequeue_task(struct task_struct *p, int cpu); #else /* CONFIG_CGROUP_SCHEDTUNE */ +#define schedtune_cpu_boost(cpu) get_sysctl_sched_cfs_boost() +#define schedtune_task_boost(tsk) get_sysctl_sched_cfs_boost() + +#define schedtune_exit_task(task) do { } while (0) + #define schedtune_enqueue_task(task, cpu) do { } while (0) #define schedtune_dequeue_task(task, cpu) do { } while (0) @@ -33,6 +40,11 @@ int schedtune_accept_deltas(int nrg_delta, int cap_delta, #else /* CONFIG_SCHED_TUNE */ +#define schedtune_cpu_boost(cpu) 0 +#define schedtune_task_boost(tsk) 0 + +#define schedtune_exit_task(task) do { } while (0) + #define schedtune_enqueue_task(task, cpu) do { } while (0) #define schedtune_dequeue_task(task, cpu) do { } while (0) -- GitLab From 8dce8ebd95680d769ea9f23b313f9d4e1945594d Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 29 Jul 2016 14:41:25 +0100 Subject: [PATCH 1184/1442] ANDROID: sched/fair: optimize idle cpu selection for boosted tasks find_best_target CPU selection is biased towards lower CPU IDs. Bias towards higher CPUs for boosted tasks. For boosted tasks unconditionally use the idle CPU returned by find_best_target. BUG: 29512132 Change-Id: I3d650051752163fcf3dc7909751d1fde3f9d17c0 Signed-off-by: Andres Oportus Conflicts: kernel/sched/fair.c --- kernel/sched/fair.c | 70 +++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 225d106a4dab..b932a5074b12 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6269,32 +6269,30 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) return target; } -static inline int find_best_target(struct task_struct *p) +static inline int find_best_target(struct task_struct *p, bool boosted) { - int i, boosted; + int iter_cpu; int target_cpu = -1; int target_capacity = 0; int backup_capacity = 0; - int idle_cpu = -1; + int best_idle_cpu = -1; int best_idle_cstate = INT_MAX; int backup_cpu = -1; unsigned long task_util_boosted, new_util; - /* - * Favor 1) busy cpu with most capacity at current OPP - * 2) idle_cpu with capacity at current OPP - * 3) busy cpu with capacity at higher OPP - */ -#ifdef CONFIG_CGROUP_SCHEDTUNE - boosted = schedtune_task_boost(p); -#else - boosted = 0; -#endif task_util_boosted = boosted_task_util(p); - for_each_cpu(i, tsk_cpus_allowed(p)) { - int cur_capacity = capacity_curr_of(i); - struct rq *rq = cpu_rq(i); - int idle_idx = idle_get_state_idx(rq); + for (iter_cpu = 0; iter_cpu < NR_CPUS; iter_cpu++) { + int cur_capacity; + struct rq *rq; + int idle_idx; + + /* + * favor higher cpus for boosted tasks + */ + int i = boosted ? NR_CPUS-iter_cpu-1 : iter_cpu; + + if (!cpu_online(i) || !cpumask_test_cpu(i, tsk_cpus_allowed(p))) + continue; /* * p's blocked utilization is still accounted for on prev_cpu @@ -6315,46 +6313,43 @@ static inline int find_best_target(struct task_struct *p) * For boosted tasks we favor idle cpus unconditionally to * improve latency. */ - if (idle_idx >= 0 && boosted) { - if (idle_cpu < 0 || - (sysctl_sched_cstate_aware && - best_idle_cstate > idle_idx)) { - best_idle_cstate = idle_idx; - idle_cpu = i; - } + if (idle_cpu(i) && boosted) { + if (best_idle_cpu < 0) + best_idle_cpu = i; continue; } + cur_capacity = capacity_curr_of(i); + rq = cpu_rq(i); + idle_idx = idle_get_state_idx(rq); + if (new_util < cur_capacity) { if (cpu_rq(i)->nr_running) { if (target_capacity == 0 || target_capacity > cur_capacity) { - /* busy CPU with most capacity at current OPP */ target_cpu = i; target_capacity = cur_capacity; } } else if (!boosted) { - if (idle_cpu < 0 || + if (best_idle_cpu < 0 || (sysctl_sched_cstate_aware && best_idle_cstate > idle_idx)) { best_idle_cstate = idle_idx; - idle_cpu = i; + best_idle_cpu = i; } } } else if (backup_capacity == 0 || backup_capacity > cur_capacity) { - /* first busy CPU with capacity at higher OPP */ backup_capacity = cur_capacity; backup_cpu = i; } } - if (!boosted && target_cpu < 0) { - target_cpu = idle_cpu >= 0 ? idle_cpu : backup_cpu; - } + if (boosted && best_idle_cpu >= 0) + target_cpu = best_idle_cpu; + else if (target_cpu < 0) + target_cpu = best_idle_cpu >= 0 ? best_idle_cpu : backup_cpu; - if (boosted && idle_cpu >= 0) - target_cpu = idle_cpu; return target_cpu; } @@ -6440,9 +6435,16 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync) /* * Find a cpu with sufficient capacity */ - int tmp_target = find_best_target(p); +#ifdef CONFIG_CGROUP_SCHEDTUNE + bool boosted = schedtune_task_boost(p) > 0; +#else + bool boosted = 0; +#endif + int tmp_target = find_best_target(p, boosted); if (tmp_target >= 0) target_cpu = tmp_target; + if (boosted && idle_cpu(target_cpu)) + return target_cpu; } if (target_cpu != task_cpu(p)) { -- GitLab From d5563d3a5d03469460bb705c4710736efe41a0f2 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Fri, 29 Jul 2016 15:32:26 +0100 Subject: [PATCH 1185/1442] ANDROID: sched/tune: fix PB and PC cuts indexes definition The current definition of the Performance Boost (PB) and Performance Constraint (PC) regions is has two main issues: 1) in the computation of the boost index we overflow the thresholds_gains table for boost=100 2) the two cuts had _NOT_ the same ratio The last point means that when boost=0 we do _not_ have a "standard" EAS behaviour, i.e. accepting all candidate which decrease energy regardless of their impact on performances. Instead, we accept only schedule candidate which are in the Optimal region, i.e. decrease energy while increasing performances. This behaviour can have a negative impact also on CPU selection policies which tries to spread tasks to reduce latencies. Indeed, for example we could end up rejecting a schedule candidate which want to move a task from a congested CPU to an idle one while, specifically in the case where the target CPU will be running on a lower OPP. This patch fixes these two issues by properly clamping the boost value in the appropriate range to compute the threshold indexes as well as by using the same threshold index for both cuts. Signed-off-by: Patrick Bellasi Signed-off-by: Srinath Sridharan sched/tune: fix update of threshold index for boost groups When SchedTune is configured to work with CGroup mode, each time we update the boost value of a group we do not update the threshed indexes for the definition of the Performance Boost (PC) and Performance Constraint (PC) region. This means that while the OPP boosting and CPU biasing selection is working as expected, the __schedtune_accept_deltas function is always using the initial values for these cuts. This patch ensure that each time a new boost value is configured for a boost group, the cuts for the PB and PC region are properly updated too. Signed-off-by: Patrick Bellasi Signed-off-by: Srinath Sridharan sched/tune: update PC and PB cuts definition The current definition of Performance Boost (PB) and Performance Constraint (PC) cuts defines two "dead regions": - up to 20% boost: we are in energy-reduction only mode, i.e. accept all candidate which reduce energy - over 70% boost: we are in performance-increase only mode, i.e. accept only sched candidate which do not reduce performances This patch uses a more fine grained configuration where these two "dead regions" are reduced to: up to 10% and over 90%. This should allow to have some boosting benefits starting from 10% boost values as well as not being to much permissive starting from boost values of 80%. Suggested-by: Leo Yan Signed-off-by: Patrick Bellasi Signed-off-by: Srinath Sridharan bug: 28312446 Change-Id: Ia326c66521e38c98e7a7eddbbb7c437875efa1ba Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/tune.c | 58 ++++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index f04c65b18fca..dd92a583057f 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -38,16 +38,16 @@ struct threshold_params { */ static struct threshold_params threshold_gains[] = { - { 0, 4 }, /* >= 0% */ - { 0, 4 }, /* >= 10% */ - { 1, 4 }, /* >= 20% */ - { 2, 4 }, /* >= 30% */ - { 3, 4 }, /* >= 40% */ - { 4, 3 }, /* >= 50% */ - { 4, 2 }, /* >= 60% */ - { 4, 1 }, /* >= 70% */ - { 4, 0 }, /* >= 80% */ - { 4, 0 } /* >= 90% */ + { 0, 5 }, /* < 10% */ + { 1, 5 }, /* < 20% */ + { 2, 5 }, /* < 30% */ + { 3, 5 }, /* < 40% */ + { 4, 5 }, /* < 50% */ + { 5, 4 }, /* < 60% */ + { 5, 3 }, /* < 70% */ + { 5, 2 }, /* < 80% */ + { 5, 1 }, /* < 90% */ + { 5, 0 } /* <= 100% */ }; static int @@ -549,13 +549,29 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft, s64 boost) { struct schedtune *st = css_st(css); + unsigned threshold_idx; + int boost_pct; if (boost < -100 || boost > 100) return -EINVAL; + boost_pct = boost; + + /* + * Update threshold params for Performance Boost (B) + * and Performance Constraint (C) regions. + * The current implementatio uses the same cuts for both + * B and C regions. + */ + threshold_idx = clamp(boost_pct, 0, 99) / 10; + st->perf_boost_idx = threshold_idx; + st->perf_constrain_idx = threshold_idx; st->boost = boost; - if (css == &root_schedtune.css) + if (css == &root_schedtune.css) { sysctl_sched_cfs_boost = boost; + perf_boost_idx = threshold_idx; + perf_constrain_idx = threshold_idx; + } /* Update CPU boost */ schedtune_boostgroup_update(st->idx, st->boost); @@ -710,17 +726,25 @@ sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write, loff_t *ppos) { int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + unsigned threshold_idx; + int boost_pct; if (ret || !write) return ret; - /* Performance Boost (B) region threshold params */ - perf_boost_idx = sysctl_sched_cfs_boost; - perf_boost_idx /= 10; + if (sysctl_sched_cfs_boost < -100 || sysctl_sched_cfs_boost > 100) + return -EINVAL; + boost_pct = sysctl_sched_cfs_boost; - /* Performance Constraint (C) region threshold params */ - perf_constrain_idx = 100 - sysctl_sched_cfs_boost; - perf_constrain_idx /= 10; + /* + * Update threshold params for Performance Boost (B) + * and Performance Constraint (C) regions. + * The current implementatio uses the same cuts for both + * B and C regions. + */ + threshold_idx = clamp(boost_pct, 0, 99) / 10; + perf_boost_idx = threshold_idx; + perf_constrain_idx = threshold_idx; return 0; } -- GitLab From 26c2154816c7653cf0485b89c8fbb4187f479eef Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Tue, 31 May 2016 09:08:38 -0700 Subject: [PATCH 1186/1442] ANDROID: sched: Introduce Window Assisted Load Tracking (WALT) use a window based view of time in order to track task demand and CPU utilization in the scheduler. Window Assisted Load Tracking (WALT) implementation credits: Srivatsa Vaddagiri, Steve Muckle, Syed Rameez Mustafa, Joonwoo Park, Pavan Kumar Kondeti, Olav Haugan 2016-03-06: Integration with EAS/refactoring by Vikram Mulukutla and Todd Kjos Change-Id: I21408236836625d4e7d7de1843d20ed5ff36c708 Includes fixes for issues: eas/walt: Use walt_ktime_clock() instead of ktime_get_ns() to avoid a race resulting in watchdog resets BUG: 29353986 Change-Id: Ic1820e22a136f7c7ebd6f42e15f14d470f6bbbdb Handle walt accounting anomoly during resume During resume, there is a corner case where on wakeup, a task's prev_runnable_sum can go negative. This is a workaround that fixes the condition and warns (instead of crashing). BUG: 29464099 Change-Id: I173e7874324b31a3584435530281708145773508 Signed-off-by: Todd Kjos Signed-off-by: Srinath Sridharan Signed-off-by: Juri Lelli [jstultz: fwdported to 4.4] Signed-off-by: John Stultz Signed-off-by: Andres Oportus --- include/linux/sched.h | 53 ++ include/linux/sched/sysctl.h | 5 + include/trace/events/sched.h | 149 +++++ init/Kconfig | 9 + kernel/sched/Makefile | 1 + kernel/sched/core.c | 41 +- kernel/sched/fair.c | 18 + kernel/sched/rt.c | 4 + kernel/sched/sched.h | 34 ++ kernel/sched/stop_task.c | 3 + kernel/sched/walt.c | 1098 ++++++++++++++++++++++++++++++++++ kernel/sched/walt.h | 57 ++ kernel/sysctl.c | 23 + 13 files changed, 1494 insertions(+), 1 deletion(-) create mode 100644 kernel/sched/walt.c create mode 100644 kernel/sched/walt.h diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d779262b0c4..f90847037de7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -318,6 +318,15 @@ extern char ___assert_task_state[1 - 2*!!( /* Task command name length */ #define TASK_COMM_LEN 16 +enum task_event { + PUT_PREV_TASK = 0, + PICK_NEXT_TASK = 1, + TASK_WAKE = 2, + TASK_MIGRATE = 3, + TASK_UPDATE = 4, + IRQ_UPDATE = 5, +}; + #include /* @@ -1368,6 +1377,41 @@ struct sched_statistics { }; #endif +#ifdef CONFIG_SCHED_WALT +#define RAVG_HIST_SIZE_MAX 5 + +/* ravg represents frequency scaled cpu-demand of tasks */ +struct ravg { + /* + * 'mark_start' marks the beginning of an event (task waking up, task + * starting to execute, task being preempted) within a window + * + * 'sum' represents how runnable a task has been within current + * window. It incorporates both running time and wait time and is + * frequency scaled. + * + * 'sum_history' keeps track of history of 'sum' seen over previous + * RAVG_HIST_SIZE windows. Windows where task was entirely sleeping are + * ignored. + * + * 'demand' represents maximum sum seen over previous + * sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency + * demand for tasks. + * + * 'curr_window' represents task's contribution to cpu busy time + * statistics (rq->curr_runnable_sum) in current window + * + * 'prev_window' represents task's contribution to cpu busy time + * statistics (rq->prev_runnable_sum) in previous window + */ + u64 mark_start; + u32 sum, demand; + u32 sum_history[RAVG_HIST_SIZE_MAX]; + u32 curr_window, prev_window; + u16 active_windows; +}; +#endif + struct sched_entity { struct load_weight load; /* for load-balancing */ struct rb_node run_node; @@ -1538,6 +1582,15 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; +#ifdef CONFIG_SCHED_WALT + struct ravg ravg; + /* + * 'init_load_pct' represents the initial task load assigned to children + * of this task + */ + u32 init_load_pct; +#endif + #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 02757437db29..9e82e3ada1fc 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -22,6 +22,11 @@ extern unsigned int sysctl_sched_is_big_little; extern unsigned int sysctl_sched_sync_hint_enable; extern unsigned int sysctl_sched_initial_task_util; extern unsigned int sysctl_sched_cstate_aware; +#ifdef CONFIG_SCHED_WALT +extern unsigned int sysctl_sched_use_walt_cpu_util; +extern unsigned int sysctl_sched_use_walt_task_util; +extern unsigned int sysctl_sched_walt_init_task_load_pct; +#endif enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 28215ae31eab..230dc4368655 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -912,6 +912,155 @@ TRACE_EVENT(sched_tune_filter, __entry->payoff, __entry->region) ); +#ifdef CONFIG_SCHED_WALT +struct rq; + +TRACE_EVENT(walt_update_task_ravg, + + TP_PROTO(struct task_struct *p, struct rq *rq, int evt, + u64 wallclock, u64 irqtime), + + TP_ARGS(p, rq, evt, wallclock, irqtime), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( pid_t, cur_pid ) + __field(unsigned int, cur_freq ) + __field( u64, wallclock ) + __field( u64, mark_start ) + __field( u64, delta_m ) + __field( u64, win_start ) + __field( u64, delta ) + __field( u64, irqtime ) + __field( int, evt ) + __field(unsigned int, demand ) + __field(unsigned int, sum ) + __field( int, cpu ) + __field( u64, cs ) + __field( u64, ps ) + __field( u32, curr_window ) + __field( u32, prev_window ) + __field( u64, nt_cs ) + __field( u64, nt_ps ) + __field( u32, active_windows ) + ), + + TP_fast_assign( + __entry->wallclock = wallclock; + __entry->win_start = rq->window_start; + __entry->delta = (wallclock - rq->window_start); + __entry->evt = evt; + __entry->cpu = rq->cpu; + __entry->cur_pid = rq->curr->pid; + __entry->cur_freq = rq->cur_freq; + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->mark_start = p->ravg.mark_start; + __entry->delta_m = (wallclock - p->ravg.mark_start); + __entry->demand = p->ravg.demand; + __entry->sum = p->ravg.sum; + __entry->irqtime = irqtime; + __entry->cs = rq->curr_runnable_sum; + __entry->ps = rq->prev_runnable_sum; + __entry->curr_window = p->ravg.curr_window; + __entry->prev_window = p->ravg.prev_window; + __entry->nt_cs = rq->nt_curr_runnable_sum; + __entry->nt_ps = rq->nt_prev_runnable_sum; + __entry->active_windows = p->ravg.active_windows; + ), + + TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu" + " cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u" + , __entry->wallclock, __entry->win_start, __entry->delta, + __entry->evt, __entry->cpu, + __entry->cur_freq, __entry->cur_pid, + __entry->pid, __entry->comm, __entry->mark_start, + __entry->delta_m, __entry->demand, + __entry->sum, __entry->irqtime, + __entry->cs, __entry->ps, + __entry->curr_window, __entry->prev_window, + __entry->nt_cs, __entry->nt_ps, + __entry->active_windows + ) +); + +TRACE_EVENT(walt_update_history, + + TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples, + int evt), + + TP_ARGS(rq, p, runtime, samples, evt), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field(unsigned int, runtime ) + __field( int, samples ) + __field( int, evt ) + __field( u64, demand ) + __field(unsigned int, walt_avg ) + __field(unsigned int, pelt_avg ) + __array( u32, hist, RAVG_HIST_SIZE_MAX) + __field( int, cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->runtime = runtime; + __entry->samples = samples; + __entry->evt = evt; + __entry->demand = p->ravg.demand; + __entry->walt_avg = (__entry->demand << 10) / walt_ravg_window, + __entry->pelt_avg = p->se.avg.util_avg; + memcpy(__entry->hist, p->ravg.sum_history, + RAVG_HIST_SIZE_MAX * sizeof(u32)); + __entry->cpu = rq->cpu; + ), + + TP_printk("%d (%s): runtime %u samples %d event %d demand %llu" + " walt %u pelt %u (hist: %u %u %u %u %u) cpu %d", + __entry->pid, __entry->comm, + __entry->runtime, __entry->samples, __entry->evt, + __entry->demand, + __entry->walt_avg, + __entry->pelt_avg, + __entry->hist[0], __entry->hist[1], + __entry->hist[2], __entry->hist[3], + __entry->hist[4], __entry->cpu) +); + +TRACE_EVENT(walt_migration_update_sum, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __field(int, cpu ) + __field(int, pid ) + __field( u64, cs ) + __field( u64, ps ) + __field( s64, nt_cs ) + __field( s64, nt_ps ) + ), + + TP_fast_assign( + __entry->cpu = cpu_of(rq); + __entry->cs = rq->curr_runnable_sum; + __entry->ps = rq->prev_runnable_sum; + __entry->nt_cs = (s64)rq->nt_curr_runnable_sum; + __entry->nt_ps = (s64)rq->nt_prev_runnable_sum; + __entry->pid = p->pid; + ), + + TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d", + __entry->cpu, __entry->cs, __entry->ps, + __entry->nt_cs, __entry->nt_ps, __entry->pid) +); +#endif /* CONFIG_SCHED_WALT */ + #endif /* CONFIG_SMP */ #endif /* _TRACE_SCHED_H */ diff --git a/init/Kconfig b/init/Kconfig index b0bfddeb39a4..d7dd52f8ec23 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -400,6 +400,15 @@ config IRQ_TIME_ACCOUNTING If in doubt, say N here. +config SCHED_WALT + bool "Support window based load tracking" + depends on SMP + help + This feature will allow the scheduler to maintain a tunable window + based set of metrics for tasks and runqueues. These metrics can be + used to guide task placement as well as task frequency requirements + for cpufreq governors. + config BSD_PROCESS_ACCT bool "BSD Process Accounting" depends on MULTIUSER diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 87be48374e01..62793dbd2e12 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -19,6 +19,7 @@ obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o obj-y += wait.o swait.o completion.o idle.o obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o +obj-$(CONFIG_SCHED_WALT) += walt.o obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b6126506dad2..1516c867876d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -90,6 +90,7 @@ #define CREATE_TRACE_POINTS #include +#include "walt.h" DEFINE_MUTEX(sched_domains_mutex); DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); @@ -1265,6 +1266,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) p->sched_class->migrate_task_rq(p); p->se.nr_migrations++; perf_event_task_migrate(p); + + walt_fixup_busy_time(p, new_cpu); } __set_task_cpu(p, new_cpu); @@ -2021,6 +2024,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) { unsigned long flags; int cpu, success = 0; +#ifdef CONFIG_SMP + struct rq *rq; + u64 wallclock; +#endif /* * If we are going to wake up a thread waiting for CONDITION we @@ -2094,14 +2101,24 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) */ smp_cond_load_acquire(&p->on_cpu, !VAL); + rq = cpu_rq(task_cpu(p)); + + raw_spin_lock(&rq->lock); + wallclock = walt_ktime_clock(); + walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); + walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0); + raw_spin_unlock(&rq->lock); + p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); + if (task_cpu(p) != cpu) { wake_flags |= WF_MIGRATED; set_task_cpu(p, cpu); } + #endif /* CONFIG_SMP */ ttwu_queue(p, cpu, wake_flags); @@ -2151,8 +2168,13 @@ static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie trace_sched_waking(p); - if (!task_on_rq_queued(p)) + if (!task_on_rq_queued(p)) { + u64 wallclock = walt_ktime_clock(); + + walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); + walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0); ttwu_activate(rq, p, ENQUEUE_WAKEUP); + } ttwu_do_wakeup(rq, p, 0, cookie); ttwu_stat(p, smp_processor_id(), 0); @@ -2217,6 +2239,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.nr_migrations = 0; p->se.vruntime = 0; INIT_LIST_HEAD(&p->se.group_node); + walt_init_new_task_load(p); #ifdef CONFIG_FAIR_GROUP_SCHED p->se.cfs_rq = NULL; @@ -2576,6 +2599,9 @@ void wake_up_new_task(struct task_struct *p) struct rq *rq; raw_spin_lock_irqsave(&p->pi_lock, rf.flags); + + walt_init_new_task_load(p); + p->state = TASK_RUNNING; #ifdef CONFIG_SMP /* @@ -2591,6 +2617,8 @@ void wake_up_new_task(struct task_struct *p) rq = __task_rq_lock(p, &rf); post_init_entity_util_avg(&p->se); + walt_mark_task_starting(p); + activate_task(rq, p, ENQUEUE_WAKEUP_NEW); p->on_rq = TASK_ON_RQ_QUEUED; trace_sched_wakeup_new(p); @@ -3162,9 +3190,12 @@ void scheduler_tick(void) sched_clock_tick(); raw_spin_lock(&rq->lock); + walt_set_window_start(rq); update_rq_clock(rq); curr->sched_class->task_tick(rq, curr, 0); cpu_load_update_active(rq); + walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, + walt_ktime_clock(), 0); calc_global_load_tick(rq); raw_spin_unlock(&rq->lock); @@ -3420,6 +3451,7 @@ static void __sched notrace __schedule(bool preempt) struct pin_cookie cookie; struct rq *rq; int cpu; + u64 wallclock; cpu = smp_processor_id(); rq = cpu_rq(cpu); @@ -3472,6 +3504,9 @@ static void __sched notrace __schedule(bool preempt) update_rq_clock(rq); next = pick_next_task(rq, prev, cookie); + wallclock = walt_ktime_clock(); + walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0); + walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0); clear_tsk_need_resched(prev); clear_preempt_need_resched(); rq->clock_skip_update = 0; @@ -7564,6 +7599,9 @@ int sched_cpu_dying(unsigned int cpu) /* Handle pending wakeups and then migrate everything off */ sched_ttwu_pending(); raw_spin_lock_irqsave(&rq->lock, flags); + + walt_migrate_sync_cpu(cpu); + if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); @@ -7599,6 +7637,7 @@ void __init sched_init_smp(void) { cpumask_var_t non_isolated_cpus; + walt_init_cpu_efficiency(); alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); alloc_cpumask_var(&fallback_doms, GFP_KERNEL); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b932a5074b12..44ab02aad5ea 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -30,11 +30,13 @@ #include #include #include +#include #include #include "sched.h" #include "tune.h" +#include "walt.h" /* * Targeted preemption latency for CPU-bound tasks: @@ -56,6 +58,10 @@ unsigned int sysctl_sched_sync_hint_enable = 1; unsigned int sysctl_sched_initial_task_util = 0; unsigned int sysctl_sched_cstate_aware = 1; +#ifdef CONFIG_SCHED_WALT +unsigned int sysctl_sched_use_walt_cpu_util = 1; +unsigned int sysctl_sched_use_walt_task_util = 1; +#endif /* * The initial- and re-scaling of tunables is configurable * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) @@ -4604,6 +4610,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; cfs_rq->h_nr_running++; + walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p); flags = ENQUEUE_WAKEUP; } @@ -4611,6 +4618,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); cfs_rq->h_nr_running++; + walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p); if (cfs_rq_throttled(cfs_rq)) break; @@ -4625,6 +4633,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) #ifdef CONFIG_SMP if (!se) { + walt_inc_cumulative_runnable_avg(rq, p); if (!task_new && !rq->rd->overutilized && cpu_overutilized(rq->cpu)) rq->rd->overutilized = true; @@ -4674,6 +4683,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; cfs_rq->h_nr_running--; + walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p); /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { @@ -4693,6 +4703,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); cfs_rq->h_nr_running--; + walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p); if (cfs_rq_throttled(cfs_rq)) break; @@ -4707,6 +4718,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) #ifdef CONFIG_SMP if (!se) { + walt_dec_cumulative_runnable_avg(rq, p); /* * We want to potentially trigger a freq switch @@ -5673,6 +5685,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, static inline int task_util(struct task_struct *p) { +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_task_util) { + unsigned long demand = p->ravg.demand; + return (demand << 10) / walt_ravg_window; + } +#endif return p->se.avg.util_avg; } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 2516b8df6dbb..cc031eb193a6 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -8,6 +8,8 @@ #include #include +#include "walt.h" + int sched_rr_timeslice = RR_TIMESLICE; static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); @@ -1322,6 +1324,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) rt_se->timeout = 0; enqueue_rt_entity(rt_se, flags); + walt_inc_cumulative_runnable_avg(rq, p); if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1) enqueue_pushable_task(rq, p); @@ -1333,6 +1336,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) update_curr_rt(rq); dequeue_rt_entity(rt_se, flags); + walt_dec_cumulative_runnable_avg(rq, p); dequeue_pushable_task(rq, p); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9767d7ebf29a..8c7bd8e60970 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -438,6 +438,10 @@ struct cfs_rq { struct list_head leaf_cfs_rq_list; struct task_group *tg; /* group that "owns" this runqueue */ +#ifdef CONFIG_SCHED_WALT + u64 cumulative_runnable_avg; +#endif + #ifdef CONFIG_CFS_BANDWIDTH int runtime_enabled; u64 runtime_expires; @@ -691,6 +695,27 @@ struct rq { u64 max_idle_balance_cost; #endif +#ifdef CONFIG_SCHED_WALT + /* + * max_freq = user or thermal defined maximum + * max_possible_freq = maximum supported by hardware + */ + unsigned int cur_freq, max_freq, min_freq, max_possible_freq; + struct cpumask freq_domain_cpumask; + + u64 cumulative_runnable_avg; + int efficiency; /* Differentiate cpus with different IPC capability */ + int load_scale_factor; + int capacity; + int max_possible_capacity; + u64 window_start; + u64 curr_runnable_sum; + u64 prev_runnable_sum; + u64 nt_curr_runnable_sum; + u64 nt_prev_runnable_sum; +#endif /* CONFIG_SCHED_WALT */ + + #ifdef CONFIG_IRQ_TIME_ACCOUNTING u64 prev_irq_time; #endif @@ -1576,6 +1601,10 @@ static inline unsigned long capacity_orig_of(int cpu) return cpu_rq(cpu)->cpu_capacity_orig; } +extern unsigned int sysctl_sched_use_walt_cpu_util; +extern unsigned int walt_ravg_window; +extern unsigned int walt_disabled; + /* * cpu_util returns the amount of capacity of a CPU that is used by CFS * tasks. The unit of the return value must be the one of capacity so we can @@ -1607,6 +1636,11 @@ static inline unsigned long __cpu_util(int cpu, int delta) unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; unsigned long capacity = capacity_orig_of(cpu); +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) + util = (cpu_rq(cpu)->prev_runnable_sum << SCHED_CAPACITY_SHIFT) / + walt_ravg_window; +#endif delta += util; if (delta < 0) return 0; diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 604297a08b3a..836a3894cf57 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -1,4 +1,5 @@ #include "sched.h" +#include "walt.h" /* * stop-task scheduling class. @@ -42,12 +43,14 @@ static void enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) { add_nr_running(rq, 1); + walt_inc_cumulative_runnable_avg(rq, p); } static void dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags) { sub_nr_running(rq, 1); + walt_dec_cumulative_runnable_avg(rq, p); } static void yield_task_stop(struct rq *rq) diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c new file mode 100644 index 000000000000..baab741a2a33 --- /dev/null +++ b/kernel/sched/walt.c @@ -0,0 +1,1098 @@ +/* + * Copyright (c) 2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * + * Window Assisted Load Tracking (WALT) implementation credits: + * Srivatsa Vaddagiri, Steve Muckle, Syed Rameez Mustafa, Joonwoo Park, + * Pavan Kumar Kondeti, Olav Haugan + * + * 2016-03-06: Integration with EAS/refactoring by Vikram Mulukutla + * and Todd Kjos + */ + +#include +#include +#include +#include "sched.h" +#include "walt.h" + +#define WINDOW_STATS_RECENT 0 +#define WINDOW_STATS_MAX 1 +#define WINDOW_STATS_MAX_RECENT_AVG 2 +#define WINDOW_STATS_AVG 3 +#define WINDOW_STATS_INVALID_POLICY 4 + +#define EXITING_TASK_MARKER 0xdeaddead + +static __read_mostly unsigned int walt_ravg_hist_size = 5; +static __read_mostly unsigned int walt_window_stats_policy = + WINDOW_STATS_MAX_RECENT_AVG; +static __read_mostly unsigned int walt_account_wait_time = 1; +static __read_mostly unsigned int walt_freq_account_wait_time = 0; +static __read_mostly unsigned int walt_io_is_busy = 0; + +unsigned int sysctl_sched_walt_init_task_load_pct = 15; + +/* 1 -> use PELT based load stats, 0 -> use window-based load stats */ +unsigned int __read_mostly walt_disabled = 0; + +static unsigned int max_possible_efficiency = 1024; +static unsigned int min_possible_efficiency = 1024; + +/* + * Maximum possible frequency across all cpus. Task demand and cpu + * capacity (cpu_power) metrics are scaled in reference to it. + */ +static unsigned int max_possible_freq = 1; + +/* + * Minimum possible max_freq across all cpus. This will be same as + * max_possible_freq on homogeneous systems and could be different from + * max_possible_freq on heterogenous systems. min_max_freq is used to derive + * capacity (cpu_power) of cpus. + */ +static unsigned int min_max_freq = 1; + +static unsigned int max_capacity = 1024; +static unsigned int min_capacity = 1024; +static unsigned int max_load_scale_factor = 1024; +static unsigned int max_possible_capacity = 1024; + +/* Mask of all CPUs that have max_possible_capacity */ +static cpumask_t mpc_mask = CPU_MASK_ALL; + +/* Window size (in ns) */ +__read_mostly unsigned int walt_ravg_window = 20000000; + +/* Min window size (in ns) = 10ms */ +#define MIN_SCHED_RAVG_WINDOW 10000000 + +/* Max window size (in ns) = 1s */ +#define MAX_SCHED_RAVG_WINDOW 1000000000 + +static unsigned int sync_cpu; +static ktime_t ktime_last; +static bool walt_ktime_suspended; + +static unsigned int task_load(struct task_struct *p) +{ + return p->ravg.demand; +} + +void +walt_inc_cumulative_runnable_avg(struct rq *rq, + struct task_struct *p) +{ + rq->cumulative_runnable_avg += p->ravg.demand; +} + +void +walt_dec_cumulative_runnable_avg(struct rq *rq, + struct task_struct *p) +{ + rq->cumulative_runnable_avg -= p->ravg.demand; + BUG_ON((s64)rq->cumulative_runnable_avg < 0); +} + +static void +fixup_cumulative_runnable_avg(struct rq *rq, + struct task_struct *p, s64 task_load_delta) +{ + rq->cumulative_runnable_avg += task_load_delta; + if ((s64)rq->cumulative_runnable_avg < 0) + panic("cra less than zero: tld: %lld, task_load(p) = %u\n", + task_load_delta, task_load(p)); +} + +u64 walt_ktime_clock(void) +{ + if (unlikely(walt_ktime_suspended)) + return ktime_to_ns(ktime_last); + return ktime_get_ns(); +} + +static void walt_resume(void) +{ + walt_ktime_suspended = false; +} + +static int walt_suspend(void) +{ + ktime_last = ktime_get(); + walt_ktime_suspended = true; + return 0; +} + +static struct syscore_ops walt_syscore_ops = { + .resume = walt_resume, + .suspend = walt_suspend +}; + +static int __init walt_init_ops(void) +{ + register_syscore_ops(&walt_syscore_ops); + return 0; +} +late_initcall(walt_init_ops); + +void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *cfs_rq, + struct task_struct *p) +{ + cfs_rq->cumulative_runnable_avg += p->ravg.demand; +} + +void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *cfs_rq, + struct task_struct *p) +{ + cfs_rq->cumulative_runnable_avg -= p->ravg.demand; +} + +static int exiting_task(struct task_struct *p) +{ + if (p->flags & PF_EXITING) { + if (p->ravg.sum_history[0] != EXITING_TASK_MARKER) { + p->ravg.sum_history[0] = EXITING_TASK_MARKER; + } + return 1; + } + return 0; +} + +static int __init set_walt_ravg_window(char *str) +{ + get_option(&str, &walt_ravg_window); + + walt_disabled = (walt_ravg_window < MIN_SCHED_RAVG_WINDOW || + walt_ravg_window > MAX_SCHED_RAVG_WINDOW); + return 0; +} + +early_param("walt_ravg_window", set_walt_ravg_window); + +static void +update_window_start(struct rq *rq, u64 wallclock) +{ + s64 delta; + int nr_windows; + + delta = wallclock - rq->window_start; + BUG_ON(delta < 0); + if (delta < walt_ravg_window) + return; + + nr_windows = div64_u64(delta, walt_ravg_window); + rq->window_start += (u64)nr_windows * (u64)walt_ravg_window; +} + +static u64 scale_exec_time(u64 delta, struct rq *rq) +{ + unsigned int cur_freq = rq->cur_freq; + int sf; + + if (unlikely(cur_freq > max_possible_freq)) + cur_freq = rq->max_possible_freq; + + /* round up div64 */ + delta = div64_u64(delta * cur_freq + max_possible_freq - 1, + max_possible_freq); + + sf = DIV_ROUND_UP(rq->efficiency * 1024, max_possible_efficiency); + + delta *= sf; + delta >>= 10; + + return delta; +} + +static int cpu_is_waiting_on_io(struct rq *rq) +{ + if (!walt_io_is_busy) + return 0; + + return atomic_read(&rq->nr_iowait); +} + +static int account_busy_for_cpu_time(struct rq *rq, struct task_struct *p, + u64 irqtime, int event) +{ + if (is_idle_task(p)) { + /* TASK_WAKE && TASK_MIGRATE is not possible on idle task! */ + if (event == PICK_NEXT_TASK) + return 0; + + /* PUT_PREV_TASK, TASK_UPDATE && IRQ_UPDATE are left */ + return irqtime || cpu_is_waiting_on_io(rq); + } + + if (event == TASK_WAKE) + return 0; + + if (event == PUT_PREV_TASK || event == IRQ_UPDATE || + event == TASK_UPDATE) + return 1; + + /* Only TASK_MIGRATE && PICK_NEXT_TASK left */ + return walt_freq_account_wait_time; +} + +/* + * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum) + */ +static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, + int event, u64 wallclock, u64 irqtime) +{ + int new_window, nr_full_windows = 0; + int p_is_curr_task = (p == rq->curr); + u64 mark_start = p->ravg.mark_start; + u64 window_start = rq->window_start; + u32 window_size = walt_ravg_window; + u64 delta; + + new_window = mark_start < window_start; + if (new_window) { + nr_full_windows = div64_u64((window_start - mark_start), + window_size); + if (p->ravg.active_windows < USHRT_MAX) + p->ravg.active_windows++; + } + + /* Handle per-task window rollover. We don't care about the idle + * task or exiting tasks. */ + if (new_window && !is_idle_task(p) && !exiting_task(p)) { + u32 curr_window = 0; + + if (!nr_full_windows) + curr_window = p->ravg.curr_window; + + p->ravg.prev_window = curr_window; + p->ravg.curr_window = 0; + } + + if (!account_busy_for_cpu_time(rq, p, irqtime, event)) { + /* account_busy_for_cpu_time() = 0, so no update to the + * task's current window needs to be made. This could be + * for example + * + * - a wakeup event on a task within the current + * window (!new_window below, no action required), + * - switching to a new task from idle (PICK_NEXT_TASK) + * in a new window where irqtime is 0 and we aren't + * waiting on IO */ + + if (!new_window) + return; + + /* A new window has started. The RQ demand must be rolled + * over if p is the current task. */ + if (p_is_curr_task) { + u64 prev_sum = 0; + + /* p is either idle task or an exiting task */ + if (!nr_full_windows) { + prev_sum = rq->curr_runnable_sum; + } + + rq->prev_runnable_sum = prev_sum; + rq->curr_runnable_sum = 0; + } + + return; + } + + if (!new_window) { + /* account_busy_for_cpu_time() = 1 so busy time needs + * to be accounted to the current window. No rollover + * since we didn't start a new window. An example of this is + * when a task starts execution and then sleeps within the + * same window. */ + + if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq)) + delta = wallclock - mark_start; + else + delta = irqtime; + delta = scale_exec_time(delta, rq); + rq->curr_runnable_sum += delta; + if (!is_idle_task(p) && !exiting_task(p)) + p->ravg.curr_window += delta; + + return; + } + + if (!p_is_curr_task) { + /* account_busy_for_cpu_time() = 1 so busy time needs + * to be accounted to the current window. A new window + * has also started, but p is not the current task, so the + * window is not rolled over - just split up and account + * as necessary into curr and prev. The window is only + * rolled over when a new window is processed for the current + * task. + * + * Irqtime can't be accounted by a task that isn't the + * currently running task. */ + + if (!nr_full_windows) { + /* A full window hasn't elapsed, account partial + * contribution to previous completed window. */ + delta = scale_exec_time(window_start - mark_start, rq); + if (!exiting_task(p)) + p->ravg.prev_window += delta; + } else { + /* Since at least one full window has elapsed, + * the contribution to the previous window is the + * full window (window_size). */ + delta = scale_exec_time(window_size, rq); + if (!exiting_task(p)) + p->ravg.prev_window = delta; + } + rq->prev_runnable_sum += delta; + + /* Account piece of busy time in the current window. */ + delta = scale_exec_time(wallclock - window_start, rq); + rq->curr_runnable_sum += delta; + if (!exiting_task(p)) + p->ravg.curr_window = delta; + + return; + } + + if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq)) { + /* account_busy_for_cpu_time() = 1 so busy time needs + * to be accounted to the current window. A new window + * has started and p is the current task so rollover is + * needed. If any of these three above conditions are true + * then this busy time can't be accounted as irqtime. + * + * Busy time for the idle task or exiting tasks need not + * be accounted. + * + * An example of this would be a task that starts execution + * and then sleeps once a new window has begun. */ + + if (!nr_full_windows) { + /* A full window hasn't elapsed, account partial + * contribution to previous completed window. */ + delta = scale_exec_time(window_start - mark_start, rq); + if (!is_idle_task(p) && !exiting_task(p)) + p->ravg.prev_window += delta; + + delta += rq->curr_runnable_sum; + } else { + /* Since at least one full window has elapsed, + * the contribution to the previous window is the + * full window (window_size). */ + delta = scale_exec_time(window_size, rq); + if (!is_idle_task(p) && !exiting_task(p)) + p->ravg.prev_window = delta; + + } + /* + * Rollover for normal runnable sum is done here by overwriting + * the values in prev_runnable_sum and curr_runnable_sum. + * Rollover for new task runnable sum has completed by previous + * if-else statement. + */ + rq->prev_runnable_sum = delta; + + /* Account piece of busy time in the current window. */ + delta = scale_exec_time(wallclock - window_start, rq); + rq->curr_runnable_sum = delta; + if (!is_idle_task(p) && !exiting_task(p)) + p->ravg.curr_window = delta; + + return; + } + + if (irqtime) { + /* account_busy_for_cpu_time() = 1 so busy time needs + * to be accounted to the current window. A new window + * has started and p is the current task so rollover is + * needed. The current task must be the idle task because + * irqtime is not accounted for any other task. + * + * Irqtime will be accounted each time we process IRQ activity + * after a period of idleness, so we know the IRQ busy time + * started at wallclock - irqtime. */ + + BUG_ON(!is_idle_task(p)); + mark_start = wallclock - irqtime; + + /* Roll window over. If IRQ busy time was just in the current + * window then that is all that need be accounted. */ + rq->prev_runnable_sum = rq->curr_runnable_sum; + if (mark_start > window_start) { + rq->curr_runnable_sum = scale_exec_time(irqtime, rq); + return; + } + + /* The IRQ busy time spanned multiple windows. Process the + * busy time preceding the current window start first. */ + delta = window_start - mark_start; + if (delta > window_size) + delta = window_size; + delta = scale_exec_time(delta, rq); + rq->prev_runnable_sum += delta; + + /* Process the remaining IRQ busy time in the current window. */ + delta = wallclock - window_start; + rq->curr_runnable_sum = scale_exec_time(delta, rq); + + return; + } + + BUG(); +} + +static int account_busy_for_task_demand(struct task_struct *p, int event) +{ + /* No need to bother updating task demand for exiting tasks + * or the idle task. */ + if (exiting_task(p) || is_idle_task(p)) + return 0; + + /* When a task is waking up it is completing a segment of non-busy + * time. Likewise, if wait time is not treated as busy time, then + * when a task begins to run or is migrated, it is not running and + * is completing a segment of non-busy time. */ + if (event == TASK_WAKE || (!walt_account_wait_time && + (event == PICK_NEXT_TASK || event == TASK_MIGRATE))) + return 0; + + return 1; +} + +/* + * Called when new window is starting for a task, to record cpu usage over + * recently concluded window(s). Normally 'samples' should be 1. It can be > 1 + * when, say, a real-time task runs without preemption for several windows at a + * stretch. + */ +static void update_history(struct rq *rq, struct task_struct *p, + u32 runtime, int samples, int event) +{ + u32 *hist = &p->ravg.sum_history[0]; + int ridx, widx; + u32 max = 0, avg, demand; + u64 sum = 0; + + /* Ignore windows where task had no activity */ + if (!runtime || is_idle_task(p) || exiting_task(p) || !samples) + goto done; + + /* Push new 'runtime' value onto stack */ + widx = walt_ravg_hist_size - 1; + ridx = widx - samples; + for (; ridx >= 0; --widx, --ridx) { + hist[widx] = hist[ridx]; + sum += hist[widx]; + if (hist[widx] > max) + max = hist[widx]; + } + + for (widx = 0; widx < samples && widx < walt_ravg_hist_size; widx++) { + hist[widx] = runtime; + sum += hist[widx]; + if (hist[widx] > max) + max = hist[widx]; + } + + p->ravg.sum = 0; + + if (walt_window_stats_policy == WINDOW_STATS_RECENT) { + demand = runtime; + } else if (walt_window_stats_policy == WINDOW_STATS_MAX) { + demand = max; + } else { + avg = div64_u64(sum, walt_ravg_hist_size); + if (walt_window_stats_policy == WINDOW_STATS_AVG) + demand = avg; + else + demand = max(avg, runtime); + } + + /* + * A throttled deadline sched class task gets dequeued without + * changing p->on_rq. Since the dequeue decrements hmp stats + * avoid decrementing it here again. + */ + if (task_on_rq_queued(p) && (!task_has_dl_policy(p) || + !p->dl.dl_throttled)) + fixup_cumulative_runnable_avg(rq, p, demand); + + p->ravg.demand = demand; + +done: + trace_walt_update_history(rq, p, runtime, samples, event); + return; +} + +static void add_to_task_demand(struct rq *rq, struct task_struct *p, + u64 delta) +{ + delta = scale_exec_time(delta, rq); + p->ravg.sum += delta; + if (unlikely(p->ravg.sum > walt_ravg_window)) + p->ravg.sum = walt_ravg_window; +} + +/* + * Account cpu demand of task and/or update task's cpu demand history + * + * ms = p->ravg.mark_start; + * wc = wallclock + * ws = rq->window_start + * + * Three possibilities: + * + * a) Task event is contained within one window. + * window_start < mark_start < wallclock + * + * ws ms wc + * | | | + * V V V + * |---------------| + * + * In this case, p->ravg.sum is updated *iff* event is appropriate + * (ex: event == PUT_PREV_TASK) + * + * b) Task event spans two windows. + * mark_start < window_start < wallclock + * + * ms ws wc + * | | | + * V V V + * -----|------------------- + * + * In this case, p->ravg.sum is updated with (ws - ms) *iff* event + * is appropriate, then a new window sample is recorded followed + * by p->ravg.sum being set to (wc - ws) *iff* event is appropriate. + * + * c) Task event spans more than two windows. + * + * ms ws_tmp ws wc + * | | | | + * V V V V + * ---|-------|-------|-------|-------|------ + * | | + * |<------ nr_full_windows ------>| + * + * In this case, p->ravg.sum is updated with (ws_tmp - ms) first *iff* + * event is appropriate, window sample of p->ravg.sum is recorded, + * 'nr_full_window' samples of window_size is also recorded *iff* + * event is appropriate and finally p->ravg.sum is set to (wc - ws) + * *iff* event is appropriate. + * + * IMPORTANT : Leave p->ravg.mark_start unchanged, as update_cpu_busy_time() + * depends on it! + */ +static void update_task_demand(struct task_struct *p, struct rq *rq, + int event, u64 wallclock) +{ + u64 mark_start = p->ravg.mark_start; + u64 delta, window_start = rq->window_start; + int new_window, nr_full_windows; + u32 window_size = walt_ravg_window; + + new_window = mark_start < window_start; + if (!account_busy_for_task_demand(p, event)) { + if (new_window) + /* If the time accounted isn't being accounted as + * busy time, and a new window started, only the + * previous window need be closed out with the + * pre-existing demand. Multiple windows may have + * elapsed, but since empty windows are dropped, + * it is not necessary to account those. */ + update_history(rq, p, p->ravg.sum, 1, event); + return; + } + + if (!new_window) { + /* The simple case - busy time contained within the existing + * window. */ + add_to_task_demand(rq, p, wallclock - mark_start); + return; + } + + /* Busy time spans at least two windows. Temporarily rewind + * window_start to first window boundary after mark_start. */ + delta = window_start - mark_start; + nr_full_windows = div64_u64(delta, window_size); + window_start -= (u64)nr_full_windows * (u64)window_size; + + /* Process (window_start - mark_start) first */ + add_to_task_demand(rq, p, window_start - mark_start); + + /* Push new sample(s) into task's demand history */ + update_history(rq, p, p->ravg.sum, 1, event); + if (nr_full_windows) + update_history(rq, p, scale_exec_time(window_size, rq), + nr_full_windows, event); + + /* Roll window_start back to current to process any remainder + * in current window. */ + window_start += (u64)nr_full_windows * (u64)window_size; + + /* Process (wallclock - window_start) next */ + mark_start = window_start; + add_to_task_demand(rq, p, wallclock - mark_start); +} + +/* Reflect task activity on its demand and cpu's busy time statistics */ +void walt_update_task_ravg(struct task_struct *p, struct rq *rq, + int event, u64 wallclock, u64 irqtime) +{ + if (walt_disabled || !rq->window_start) + return; + + lockdep_assert_held(&rq->lock); + + update_window_start(rq, wallclock); + + if (!p->ravg.mark_start) + goto done; + + update_task_demand(p, rq, event, wallclock); + update_cpu_busy_time(p, rq, event, wallclock, irqtime); + +done: + trace_walt_update_task_ravg(p, rq, event, wallclock, irqtime); + + p->ravg.mark_start = wallclock; +} + +unsigned long __weak arch_get_cpu_efficiency(int cpu) +{ + return SCHED_CAPACITY_SCALE; +} + +void walt_init_cpu_efficiency(void) +{ + int i, efficiency; + unsigned int max = 0, min = UINT_MAX; + + for_each_possible_cpu(i) { + efficiency = arch_get_cpu_efficiency(i); + cpu_rq(i)->efficiency = efficiency; + + if (efficiency > max) + max = efficiency; + if (efficiency < min) + min = efficiency; + } + + if (max) + max_possible_efficiency = max; + + if (min) + min_possible_efficiency = min; +} + +static void reset_task_stats(struct task_struct *p) +{ + u32 sum = 0; + + if (exiting_task(p)) + sum = EXITING_TASK_MARKER; + + memset(&p->ravg, 0, sizeof(struct ravg)); + /* Retain EXITING_TASK marker */ + p->ravg.sum_history[0] = sum; +} + +void walt_mark_task_starting(struct task_struct *p) +{ + u64 wallclock; + struct rq *rq = task_rq(p); + + if (!rq->window_start) { + reset_task_stats(p); + return; + } + + wallclock = walt_ktime_clock(); + p->ravg.mark_start = wallclock; +} + +void walt_set_window_start(struct rq *rq) +{ + int cpu = cpu_of(rq); + struct rq *sync_rq = cpu_rq(sync_cpu); + + if (rq->window_start) + return; + + if (cpu == sync_cpu) { + rq->window_start = walt_ktime_clock(); + } else { + raw_spin_unlock(&rq->lock); + double_rq_lock(rq, sync_rq); + rq->window_start = cpu_rq(sync_cpu)->window_start; + rq->curr_runnable_sum = rq->prev_runnable_sum = 0; + raw_spin_unlock(&sync_rq->lock); + } + + rq->curr->ravg.mark_start = rq->window_start; +} + +void walt_migrate_sync_cpu(int cpu) +{ + if (cpu == sync_cpu) + sync_cpu = smp_processor_id(); +} + +void walt_fixup_busy_time(struct task_struct *p, int new_cpu) +{ + struct rq *src_rq = task_rq(p); + struct rq *dest_rq = cpu_rq(new_cpu); + u64 wallclock; + + if (!p->on_rq && p->state != TASK_WAKING) + return; + + if (exiting_task(p)) { + return; + } + + if (p->state == TASK_WAKING) + double_rq_lock(src_rq, dest_rq); + + wallclock = walt_ktime_clock(); + + walt_update_task_ravg(task_rq(p)->curr, task_rq(p), + TASK_UPDATE, wallclock, 0); + walt_update_task_ravg(dest_rq->curr, dest_rq, + TASK_UPDATE, wallclock, 0); + + walt_update_task_ravg(p, task_rq(p), TASK_MIGRATE, wallclock, 0); + + if (p->ravg.curr_window) { + src_rq->curr_runnable_sum -= p->ravg.curr_window; + dest_rq->curr_runnable_sum += p->ravg.curr_window; + } + + if (p->ravg.prev_window) { + src_rq->prev_runnable_sum -= p->ravg.prev_window; + dest_rq->prev_runnable_sum += p->ravg.prev_window; + } + + if ((s64)src_rq->prev_runnable_sum < 0) { + src_rq->prev_runnable_sum = 0; + WARN_ON(1); + } + if ((s64)src_rq->curr_runnable_sum < 0) { + src_rq->curr_runnable_sum = 0; + WARN_ON(1); + } + + trace_walt_migration_update_sum(src_rq, p); + trace_walt_migration_update_sum(dest_rq, p); + + if (p->state == TASK_WAKING) + double_rq_unlock(src_rq, dest_rq); +} + +/* Keep track of max/min capacity possible across CPUs "currently" */ +static void __update_min_max_capacity(void) +{ + int i; + int max = 0, min = INT_MAX; + + for_each_online_cpu(i) { + if (cpu_rq(i)->capacity > max) + max = cpu_rq(i)->capacity; + if (cpu_rq(i)->capacity < min) + min = cpu_rq(i)->capacity; + } + + max_capacity = max; + min_capacity = min; +} + +static void update_min_max_capacity(void) +{ + unsigned long flags; + int i; + + local_irq_save(flags); + for_each_possible_cpu(i) + raw_spin_lock(&cpu_rq(i)->lock); + + __update_min_max_capacity(); + + for_each_possible_cpu(i) + raw_spin_unlock(&cpu_rq(i)->lock); + local_irq_restore(flags); +} + +/* + * Return 'capacity' of a cpu in reference to "least" efficient cpu, such that + * least efficient cpu gets capacity of 1024 + */ +static unsigned long capacity_scale_cpu_efficiency(int cpu) +{ + return (1024 * cpu_rq(cpu)->efficiency) / min_possible_efficiency; +} + +/* + * Return 'capacity' of a cpu in reference to cpu with lowest max_freq + * (min_max_freq), such that one with lowest max_freq gets capacity of 1024. + */ +static unsigned long capacity_scale_cpu_freq(int cpu) +{ + return (1024 * cpu_rq(cpu)->max_freq) / min_max_freq; +} + +/* + * Return load_scale_factor of a cpu in reference to "most" efficient cpu, so + * that "most" efficient cpu gets a load_scale_factor of 1 + */ +static unsigned long load_scale_cpu_efficiency(int cpu) +{ + return DIV_ROUND_UP(1024 * max_possible_efficiency, + cpu_rq(cpu)->efficiency); +} + +/* + * Return load_scale_factor of a cpu in reference to cpu with best max_freq + * (max_possible_freq), so that one with best max_freq gets a load_scale_factor + * of 1. + */ +static unsigned long load_scale_cpu_freq(int cpu) +{ + return DIV_ROUND_UP(1024 * max_possible_freq, cpu_rq(cpu)->max_freq); +} + +static int compute_capacity(int cpu) +{ + int capacity = 1024; + + capacity *= capacity_scale_cpu_efficiency(cpu); + capacity >>= 10; + + capacity *= capacity_scale_cpu_freq(cpu); + capacity >>= 10; + + return capacity; +} + +static int compute_load_scale_factor(int cpu) +{ + int load_scale = 1024; + + /* + * load_scale_factor accounts for the fact that task load + * is in reference to "best" performing cpu. Task's load will need to be + * scaled (up) by a factor to determine suitability to be placed on a + * (little) cpu. + */ + load_scale *= load_scale_cpu_efficiency(cpu); + load_scale >>= 10; + + load_scale *= load_scale_cpu_freq(cpu); + load_scale >>= 10; + + return load_scale; +} + +static int cpufreq_notifier_policy(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_policy *policy = (struct cpufreq_policy *)data; + int i, update_max = 0; + u64 highest_mpc = 0, highest_mplsf = 0; + const struct cpumask *cpus = policy->related_cpus; + unsigned int orig_min_max_freq = min_max_freq; + unsigned int orig_max_possible_freq = max_possible_freq; + /* Initialized to policy->max in case policy->related_cpus is empty! */ + unsigned int orig_max_freq = policy->max; + + if (val != CPUFREQ_NOTIFY && val != CPUFREQ_REMOVE_POLICY && + val != CPUFREQ_CREATE_POLICY) + return 0; + + if (val == CPUFREQ_REMOVE_POLICY || val == CPUFREQ_CREATE_POLICY) { + update_min_max_capacity(); + return 0; + } + + for_each_cpu(i, policy->related_cpus) { + cpumask_copy(&cpu_rq(i)->freq_domain_cpumask, + policy->related_cpus); + orig_max_freq = cpu_rq(i)->max_freq; + cpu_rq(i)->min_freq = policy->min; + cpu_rq(i)->max_freq = policy->max; + cpu_rq(i)->cur_freq = policy->cur; + cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq; + } + + max_possible_freq = max(max_possible_freq, policy->cpuinfo.max_freq); + if (min_max_freq == 1) + min_max_freq = UINT_MAX; + min_max_freq = min(min_max_freq, policy->cpuinfo.max_freq); + BUG_ON(!min_max_freq); + BUG_ON(!policy->max); + + /* Changes to policy other than max_freq don't require any updates */ + if (orig_max_freq == policy->max) + return 0; + + /* + * A changed min_max_freq or max_possible_freq (possible during bootup) + * needs to trigger re-computation of load_scale_factor and capacity for + * all possible cpus (even those offline). It also needs to trigger + * re-computation of nr_big_task count on all online cpus. + * + * A changed rq->max_freq otoh needs to trigger re-computation of + * load_scale_factor and capacity for just the cluster of cpus involved. + * Since small task definition depends on max_load_scale_factor, a + * changed load_scale_factor of one cluster could influence + * classification of tasks in another cluster. Hence a changed + * rq->max_freq will need to trigger re-computation of nr_big_task + * count on all online cpus. + * + * While it should be sufficient for nr_big_tasks to be + * re-computed for only online cpus, we have inadequate context + * information here (in policy notifier) with regard to hotplug-safety + * context in which notification is issued. As a result, we can't use + * get_online_cpus() here, as it can lead to deadlock. Until cpufreq is + * fixed up to issue notification always in hotplug-safe context, + * re-compute nr_big_task for all possible cpus. + */ + + if (orig_min_max_freq != min_max_freq || + orig_max_possible_freq != max_possible_freq) { + cpus = cpu_possible_mask; + update_max = 1; + } + + /* + * Changed load_scale_factor can trigger reclassification of tasks as + * big or small. Make this change "atomic" so that tasks are accounted + * properly due to changed load_scale_factor + */ + for_each_cpu(i, cpus) { + struct rq *rq = cpu_rq(i); + + rq->capacity = compute_capacity(i); + rq->load_scale_factor = compute_load_scale_factor(i); + + if (update_max) { + u64 mpc, mplsf; + + mpc = div_u64(((u64) rq->capacity) * + rq->max_possible_freq, rq->max_freq); + rq->max_possible_capacity = (int) mpc; + + mplsf = div_u64(((u64) rq->load_scale_factor) * + rq->max_possible_freq, rq->max_freq); + + if (mpc > highest_mpc) { + highest_mpc = mpc; + cpumask_clear(&mpc_mask); + cpumask_set_cpu(i, &mpc_mask); + } else if (mpc == highest_mpc) { + cpumask_set_cpu(i, &mpc_mask); + } + + if (mplsf > highest_mplsf) + highest_mplsf = mplsf; + } + } + + if (update_max) { + max_possible_capacity = highest_mpc; + max_load_scale_factor = highest_mplsf; + } + + __update_min_max_capacity(); + + return 0; +} + +static int cpufreq_notifier_trans(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = (struct cpufreq_freqs *)data; + unsigned int cpu = freq->cpu, new_freq = freq->new; + unsigned long flags; + int i; + + if (val != CPUFREQ_POSTCHANGE) + return 0; + + BUG_ON(!new_freq); + + if (cpu_rq(cpu)->cur_freq == new_freq) + return 0; + + for_each_cpu(i, &cpu_rq(cpu)->freq_domain_cpumask) { + struct rq *rq = cpu_rq(i); + + raw_spin_lock_irqsave(&rq->lock, flags); + walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, + walt_ktime_clock(), 0); + rq->cur_freq = new_freq; + raw_spin_unlock_irqrestore(&rq->lock, flags); + } + + return 0; +} + +static struct notifier_block notifier_policy_block = { + .notifier_call = cpufreq_notifier_policy +}; + +static struct notifier_block notifier_trans_block = { + .notifier_call = cpufreq_notifier_trans +}; + +static int register_sched_callback(void) +{ + int ret; + + ret = cpufreq_register_notifier(¬ifier_policy_block, + CPUFREQ_POLICY_NOTIFIER); + + if (!ret) + ret = cpufreq_register_notifier(¬ifier_trans_block, + CPUFREQ_TRANSITION_NOTIFIER); + + return 0; +} + +/* + * cpufreq callbacks can be registered at core_initcall or later time. + * Any registration done prior to that is "forgotten" by cpufreq. See + * initialization of variable init_cpufreq_transition_notifier_list_called + * for further information. + */ +core_initcall(register_sched_callback); + +void walt_init_new_task_load(struct task_struct *p) +{ + int i; + u32 init_load_windows = + div64_u64((u64)sysctl_sched_walt_init_task_load_pct * + (u64)walt_ravg_window, 100); + u32 init_load_pct = current->init_load_pct; + + p->init_load_pct = 0; + memset(&p->ravg, 0, sizeof(struct ravg)); + + if (init_load_pct) { + init_load_windows = div64_u64((u64)init_load_pct * + (u64)walt_ravg_window, 100); + } + + p->ravg.demand = init_load_windows; + for (i = 0; i < RAVG_HIST_SIZE_MAX; ++i) + p->ravg.sum_history[i] = init_load_windows; +} diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h new file mode 100644 index 000000000000..cabc193a683d --- /dev/null +++ b/kernel/sched/walt.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __WALT_H +#define __WALT_H + +#ifdef CONFIG_SCHED_WALT + +void walt_update_task_ravg(struct task_struct *p, struct rq *rq, int event, + u64 wallclock, u64 irqtime); +void walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p); +void walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p); +void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *rq, + struct task_struct *p); +void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *rq, + struct task_struct *p); +void walt_fixup_busy_time(struct task_struct *p, int new_cpu); +void walt_init_new_task_load(struct task_struct *p); +void walt_mark_task_starting(struct task_struct *p); +void walt_set_window_start(struct rq *rq); +void walt_migrate_sync_cpu(int cpu); +void walt_init_cpu_efficiency(void); +u64 walt_ktime_clock(void); + +#else /* CONFIG_SCHED_WALT */ + +static inline void walt_update_task_ravg(struct task_struct *p, struct rq *rq, + int event, u64 wallclock, u64 irqtime) { } +static inline void walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) { } +static inline void walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) { } +static inline void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *rq, + struct task_struct *p) { } +static inline void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *rq, + struct task_struct *p) { } +static inline void walt_fixup_busy_time(struct task_struct *p, int new_cpu) { } +static inline void walt_init_new_task_load(struct task_struct *p) { } +static inline void walt_mark_task_starting(struct task_struct *p) { } +static inline void walt_set_window_start(struct rq *rq) { } +static inline void walt_migrate_sync_cpu(int cpu) { } +static inline void walt_init_cpu_efficiency(void) { } +static inline u64 walt_ktime_clock(void) { return 0; } + +#endif /* CONFIG_SCHED_WALT */ + +extern unsigned int walt_disabled; + +#endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2ed4bf05045e..bd4ec6864ecb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -314,6 +314,29 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_SCHED_WALT + { + .procname = "sched_use_walt_cpu_util", + .data = &sysctl_sched_use_walt_cpu_util, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_use_walt_task_util", + .data = &sysctl_sched_use_walt_task_util, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_walt_init_task_load_pct", + .data = &sysctl_sched_walt_init_task_load_pct, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif { .procname = "sched_sync_hint_enable", .data = &sysctl_sched_sync_hint_enable, -- GitLab From 3a73c96a286f1014aa34e07e77c8670ec1c61448 Mon Sep 17 00:00:00 2001 From: Srinath Sridharan Date: Fri, 22 Jul 2016 13:21:15 +0100 Subject: [PATCH 1187/1442] ANDROID: sched/walt: Accounting for number of irqs pending on each core Schedules on a core whose irq count is less than a threshold. Improves I/O performance of EAS. Change-Id: I08ff7dd0d22502a0106fc636b1af2e6fe9e758b5 Signed-off-by: Andres Oportus --- include/linux/sched/sysctl.h | 1 + kernel/sched/core.c | 5 +++ kernel/sched/cputime.c | 17 +++++++++- kernel/sched/fair.c | 7 +++- kernel/sched/sched.h | 3 ++ kernel/sched/walt.c | 65 ++++++++++++++++++++++++++++++++++++ kernel/sched/walt.h | 5 +++ kernel/sysctl.c | 7 ++++ 8 files changed, 108 insertions(+), 2 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 9e82e3ada1fc..264a2208d737 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -26,6 +26,7 @@ extern unsigned int sysctl_sched_cstate_aware; extern unsigned int sysctl_sched_use_walt_cpu_util; extern unsigned int sysctl_sched_use_walt_task_util; extern unsigned int sysctl_sched_walt_init_task_load_pct; +extern unsigned int sysctl_sched_walt_cpu_high_irqload; #endif enum sched_tunable_scaling { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1516c867876d..dfd0f877322b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7842,6 +7842,11 @@ void __init sched_init(void) rq->idle_stamp = 0; rq->avg_idle = 2*sysctl_sched_migration_cost; rq->max_idle_balance_cost = sysctl_sched_migration_cost; +#ifdef CONFIG_SCHED_WALT + rq->cur_irqload = 0; + rq->avg_irqload = 0; + rq->irqload_ts = 0; +#endif INIT_LIST_HEAD(&rq->cfs_tasks); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5ebee3164e64..5e9422b66f5f 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -8,7 +8,7 @@ #ifdef CONFIG_PARAVIRT #include #endif - +#include "walt.h" #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -46,11 +46,18 @@ void irqtime_account_irq(struct task_struct *curr) struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); s64 delta; int cpu; +#ifdef CONFIG_SCHED_WALT + u64 wallclock; + bool account = true; +#endif if (!sched_clock_irqtime) return; cpu = smp_processor_id(); +#ifdef CONFIG_SCHED_WALT + wallclock = sched_clock_cpu(cpu); +#endif delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; irqtime->irq_start_time += delta; @@ -65,8 +72,16 @@ void irqtime_account_irq(struct task_struct *curr) irqtime->hardirq_time += delta; else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) irqtime->softirq_time += delta; +#ifdef CONFIG_SCHED_WALT + else + account = false; +#endif u64_stats_update_end(&irqtime->sync); +#ifdef CONFIG_SCHED_WALT + if (account) + walt_account_irqtime(cpu, curr, delta, wallclock); +#endif } EXPORT_SYMBOL_GPL(irqtime_account_irq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 44ab02aad5ea..cf7b009d8311 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -61,6 +61,8 @@ unsigned int sysctl_sched_cstate_aware = 1; #ifdef CONFIG_SCHED_WALT unsigned int sysctl_sched_use_walt_cpu_util = 1; unsigned int sysctl_sched_use_walt_task_util = 1; +__read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload = + (10 * NSEC_PER_MSEC); #endif /* * The initial- and re-scaling of tunables is configurable @@ -4653,7 +4655,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) schedtune_enqueue_task(p, cpu_of(rq)); #endif /* CONFIG_SMP */ - hrtick_update(rq); } @@ -6327,6 +6328,10 @@ static inline int find_best_target(struct task_struct *p, bool boosted) if (new_util > capacity_orig_of(i)) continue; +#ifdef CONFIG_SCHED_WALT + if (walt_cpu_high_irqload(i)) + continue; +#endif /* * For boosted tasks we favor idle cpus unconditionally to * improve latency. diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 8c7bd8e60970..e4b4e01d1368 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -713,6 +713,9 @@ struct rq { u64 prev_runnable_sum; u64 nt_curr_runnable_sum; u64 nt_prev_runnable_sum; + u64 cur_irqload; + u64 avg_irqload; + u64 irqload_ts; #endif /* CONFIG_SCHED_WALT */ diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index baab741a2a33..937efc7f23df 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -221,6 +221,71 @@ static int cpu_is_waiting_on_io(struct rq *rq) return atomic_read(&rq->nr_iowait); } +void walt_account_irqtime(int cpu, struct task_struct *curr, + u64 delta, u64 wallclock) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags, nr_windows; + u64 cur_jiffies_ts; + + raw_spin_lock_irqsave(&rq->lock, flags); + + /* + * cputime (wallclock) uses sched_clock so use the same here for + * consistency. + */ + delta += sched_clock() - wallclock; + cur_jiffies_ts = get_jiffies_64(); + + if (is_idle_task(curr)) + walt_update_task_ravg(curr, rq, IRQ_UPDATE, walt_ktime_clock(), + delta); + + nr_windows = cur_jiffies_ts - rq->irqload_ts; + + if (nr_windows) { + if (nr_windows < 10) { + /* Decay CPU's irqload by 3/4 for each window. */ + rq->avg_irqload *= (3 * nr_windows); + rq->avg_irqload = div64_u64(rq->avg_irqload, + 4 * nr_windows); + } else { + rq->avg_irqload = 0; + } + rq->avg_irqload += rq->cur_irqload; + rq->cur_irqload = 0; + } + + rq->cur_irqload += delta; + rq->irqload_ts = cur_jiffies_ts; + raw_spin_unlock_irqrestore(&rq->lock, flags); +} + + +#define WALT_HIGH_IRQ_TIMEOUT 3 + +u64 walt_irqload(int cpu) { + struct rq *rq = cpu_rq(cpu); + s64 delta; + delta = get_jiffies_64() - rq->irqload_ts; + + /* + * Current context can be preempted by irq and rq->irqload_ts can be + * updated by irq context so that delta can be negative. + * But this is okay and we can safely return as this means there + * was recent irq occurrence. + */ + + if (delta < WALT_HIGH_IRQ_TIMEOUT) + return rq->avg_irqload; + else + return 0; +} + +int walt_cpu_high_irqload(int cpu) { + return walt_irqload(cpu) >= sysctl_sched_walt_cpu_high_irqload; +} + static int account_busy_for_cpu_time(struct rq *rq, struct task_struct *p, u64 irqtime, int event) { diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h index cabc193a683d..e181c87a928d 100644 --- a/kernel/sched/walt.h +++ b/kernel/sched/walt.h @@ -31,6 +31,11 @@ void walt_set_window_start(struct rq *rq); void walt_migrate_sync_cpu(int cpu); void walt_init_cpu_efficiency(void); u64 walt_ktime_clock(void); +void walt_account_irqtime(int cpu, struct task_struct *curr, u64 delta, + u64 wallclock); + +u64 walt_irqload(int cpu); +int walt_cpu_high_irqload(int cpu); #else /* CONFIG_SCHED_WALT */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bd4ec6864ecb..6e3631d7dfc5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -336,6 +336,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sched_walt_cpu_high_irqload", + .data = &sysctl_sched_walt_cpu_high_irqload, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif { .procname = "sched_sync_hint_enable", -- GitLab From c4eef1f760b0973ad9b6fbcf6bc5237fc41dc20b Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Thu, 30 Jun 2016 15:00:41 +0100 Subject: [PATCH 1188/1442] ANDROID: FIXUP: sched: fix set_cfs_cpu_capacity when WALT is in use The CPU utilization reported when WALT is in use already tracks the contributions due to RT and DL workloads. However, SchedFreq exposes different capacity update functions, one for each class, and does classes utilization internally at update_cpu_capacity_request() call time. This patch ensures that when WALT is in use, the: cpu_sched_capacity_reqs::cfs value is tracking just the load generated by SCHED_OTHER tasks. Change-Id: Ibd9c9a10874a1d91f62477034548f7664e57cd6a Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/sched.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e4b4e01d1368..c61e0cc01b95 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1674,8 +1674,27 @@ void update_cpu_capacity_request(int cpu, bool request); static inline void set_cfs_cpu_capacity(int cpu, bool request, unsigned long capacity) { - if (per_cpu(cpu_sched_capacity_reqs, cpu).cfs != capacity) { - per_cpu(cpu_sched_capacity_reqs, cpu).cfs = capacity; + struct sched_capacity_reqs *scr = &per_cpu(cpu_sched_capacity_reqs, cpu); + +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { + int rtdl = scr->rt + scr->dl; + /* + * WALT tracks the utilization of a CPU considering the load + * generated by all the scheduling classes. + * Since the following call to: + * update_cpu_capacity + * is already adding the RT and DL utilizations let's remove + * these contributions from the WALT signal. + */ + if (capacity > rtdl) + capacity -= rtdl; + else + capacity = 0; + } +#endif + if (scr->cfs != capacity) { + scr->cfs = capacity; update_cpu_capacity_request(cpu, request); } } -- GitLab From d4e742025e58ef022b373aace719de3979b94b55 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 29 Jun 2016 11:30:07 -0700 Subject: [PATCH 1189/1442] ANDROID: sched: EAS: Avoid causing spikes to max-freq unnecessarily During scheduler tick handling, the frequency was being set to max-freq if the current frequency is less than the current utilization. Change to just request "right" frequency instead of max. BUG: 29871410 Change-Id: I6fe65b14413da44b1520ba116f72320083eb92f8 Signed-off-by: Andres Oportus --- kernel/sched/core.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dfd0f877322b..1e2712fa8aaf 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3153,7 +3153,7 @@ static unsigned long sum_capacity_reqs(unsigned long cfs_cap, static void sched_freq_tick(int cpu) { struct sched_capacity_reqs *scr; - unsigned long capacity_orig, capacity_curr; + unsigned long capacity_orig, capacity_curr, capacity_sum; if (!sched_freq()) return; @@ -3166,12 +3166,15 @@ static void sched_freq_tick(int cpu) /* * To make free room for a task that is building up its "real" * utilization and to harm its performance the least, request - * a jump to max OPP as soon as the margin of free capacity is - * impacted (specified by capacity_margin). + * a jump to a higher OPP as soon as the margin of free capacity + * is impacted (specified by capacity_margin). */ + scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - if (capacity_curr < sum_capacity_reqs(cpu_util(cpu), scr)) - set_cfs_cpu_capacity(cpu, true, capacity_max); + capacity_sum = sum_capacity_reqs(cpu_util(cpu), scr); + if (capacity_curr < capacity_sum) { + set_cfs_cpu_capacity(cpu, true, capacity_sum); + } } #else static inline void sched_freq_tick(int cpu) { } -- GitLab From 3480e6bc06d4adb86ca72ab1c32d0849174b7e1e Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Thu, 30 Jun 2016 15:09:24 +0100 Subject: [PATCH 1190/1442] ANDROID: FIXUP: sched: fix SchedFreq integration for both PELT and WALT The current kernel allows to use either PELT or WALT to track CPUs utilizations. One of the main differences between the two approaches is that PELT tracks only utilization of SCHED_OTHER classes while WALT tracks all tasks with a single signal. The current sched_freq_tick does not make this distinction and, when WALT is in use, we end up adding multiple time the contribution related to the RT and DL classes. This patch fixes this issue by: 1. providing two different code paths for PELT and WALT, thus granting that when we switch to PELT we get the original behaviour based on the assumption that class aggregations is done underneath by SchedFreq. 2. avoiding the double accounting of DL and RT workloads, when WALT is in use, by just adding a margin to the original WALT signal when we need to check if the CFS capacity has to be increased. Change-Id: I7326fd50e868e97fb5e12351917e9d2969bfdae7 Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- kernel/sched/core.c | 87 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 21 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1e2712fa8aaf..b250aacec534 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3139,28 +3139,31 @@ unsigned long long task_sched_runtime(struct task_struct *p) } #ifdef CONFIG_CPU_FREQ_GOV_SCHED -static unsigned long sum_capacity_reqs(unsigned long cfs_cap, - struct sched_capacity_reqs *scr) + +static inline +unsigned long add_capacity_margin(unsigned long cpu_capacity) { - unsigned long total = cfs_cap + scr->rt; + cpu_capacity = cpu_capacity * capacity_margin; + cpu_capacity /= SCHED_CAPACITY_SCALE; + return cpu_capacity; +} - total = total * capacity_margin; - total /= SCHED_CAPACITY_SCALE; - total += scr->dl; - return total; +static inline +unsigned long sum_capacity_reqs(unsigned long cfs_cap, + struct sched_capacity_reqs *scr) +{ + unsigned long total = add_capacity_margin(cfs_cap + scr->rt); + return total += scr->dl; } -static void sched_freq_tick(int cpu) +static void sched_freq_tick_pelt(int cpu) { + unsigned long cpu_utilization = capacity_max; + unsigned long capacity_curr = capacity_curr_of(cpu); struct sched_capacity_reqs *scr; - unsigned long capacity_orig, capacity_curr, capacity_sum; - if (!sched_freq()) - return; - - capacity_orig = capacity_orig_of(cpu); - capacity_curr = capacity_curr_of(cpu); - if (capacity_curr == capacity_orig) + scr = &per_cpu(cpu_sched_capacity_reqs, cpu); + if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr) return; /* @@ -3169,16 +3172,58 @@ static void sched_freq_tick(int cpu) * a jump to a higher OPP as soon as the margin of free capacity * is impacted (specified by capacity_margin). */ + set_cfs_cpu_capacity(cpu, true, cpu_utilization); +} + +#ifdef CONFIG_SCHED_WALT +static void sched_freq_tick_walt(int cpu) +{ + unsigned long cpu_utilization = cpu_util(cpu); + unsigned long capacity_curr = capacity_curr_of(cpu); + + if (walt_disabled || !sysctl_sched_use_walt_cpu_util) + return sched_freq_tick_pelt(cpu); + + /* + * Add a margin to the WALT utilization. + * NOTE: WALT tracks a single CPU signal for all the scheduling + * classes, thus this margin is going to be added to the DL class as + * well, which is something we do not do in sched_freq_tick_pelt case. + */ + cpu_utilization = add_capacity_margin(cpu_utilization); + if (cpu_utilization <= capacity_curr) + return; + + /* + * It is likely that the load is growing so we + * keep the added margin in our request as an + * extra boost. + */ + set_cfs_cpu_capacity(cpu, true, cpu_utilization); - scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - capacity_sum = sum_capacity_reqs(cpu_util(cpu), scr); - if (capacity_curr < capacity_sum) { - set_cfs_cpu_capacity(cpu, true, capacity_sum); - } +} +#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu) +#else +#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu) +#endif /* CONFIG_SCHED_WALT */ + +static void sched_freq_tick(int cpu) +{ + unsigned long capacity_orig, capacity_curr; + + if (!sched_freq()) + return; + + capacity_orig = capacity_orig_of(cpu); + capacity_curr = capacity_curr_of(cpu); + if (capacity_curr == capacity_orig) + return; + + _sched_freq_tick(cpu); } #else static inline void sched_freq_tick(int cpu) { } -#endif +#endif /* CONFIG_CPU_FREQ_GOV_SCHED */ /* * This function gets called by the timer code, with HZ frequency. -- GitLab From 64f6fd103dadaf762d3dc11173c5544ff6ec1b16 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Thu, 16 Jun 2016 16:33:54 -0700 Subject: [PATCH 1191/1442] ANDROID: FIXUP: sched/fair: Fix hang during suspend in sched_group_energy BUG: 29353986 Change-Id: I0d0d8d5c107a2e0bd219819e036091106bb40e11 Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cf7b009d8311..370aeb6377cc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5436,6 +5436,7 @@ static int sched_group_energy(struct energy_env *eenv) } while (sg = sg->next, sg != sd->groups); } next_cpu: + cpumask_clear_cpu(cpu, &visit_cpus); continue; } -- GitLab From a31778a06121f271b996a011da83d7737ba020c7 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 4 Jul 2016 15:04:45 +0100 Subject: [PATCH 1192/1442] ANDROID: FIXUP: sched: Fix double-release of spinlock in move_queued_task BUG: 29519455 Change-Id: I4d1c27a1b4bcbba03d4b175d170cfe1701a90ffd Signed-off-by: Andres Oportus --- kernel/sched/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c61e0cc01b95..d2cdc3aaf9d9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1841,7 +1841,8 @@ static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest) static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) __releases(busiest->lock) { - raw_spin_unlock(&busiest->lock); + if (this_rq != busiest) + raw_spin_unlock(&busiest->lock); lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); } -- GitLab From b5e1207658f01946f9c6907a1bf694853dab08d7 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 25 Jul 2016 15:13:58 +0100 Subject: [PATCH 1193/1442] ANDROID: arch_timer: add error handling when the MPM global timer is cleared Bug: 29000863 Signed-off-by: albert.zl_huang Change-Id: I2b5a28b0a9edb31bdaa1ca2310397dd2f36f6c23 Updated to use arch_timer_read_counter() as arch_counter_get_cntvct doesn't exist in this kernel. Signed-off-by: Chris Redpath Signed-off-by: Andres Oportus --- kernel/sched/walt.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 937efc7f23df..2bc5c1fdcd23 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -185,7 +185,14 @@ update_window_start(struct rq *rq, u64 wallclock) int nr_windows; delta = wallclock - rq->window_start; - BUG_ON(delta < 0); + /* If the MPM global timer is cleared, set delta as 0 to avoid kernel BUG happening */ + if (delta < 0) { + if (arch_timer_read_counter() == 0) + delta = 0; + else + BUG_ON(1); + } + if (delta < walt_ravg_window) return; -- GitLab From bd7ee31d807e9d4480a40fa730f7f57c96041314 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 13 Jul 2016 16:13:47 -0700 Subject: [PATCH 1194/1442] ANDROID: sched: use util instead of capacity to select busy cpu If cpus are busy, the cpu selection algorithm was favoring cpus with lower capacity. This can result in uneven packing since there will be a bias toward the same cpu until there is a capacity change. Instead use the utilization so there is immediate feedback as tasks are assigned BUG: 30115868 Change-Id: I0ac7ae3ab5d8f2f5a5838c29bb6da2c3e8ef44e8 Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 370aeb6377cc..23047b2738cd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6293,7 +6293,7 @@ static inline int find_best_target(struct task_struct *p, bool boosted) { int iter_cpu; int target_cpu = -1; - int target_capacity = 0; + int target_util = 0; int backup_capacity = 0; int best_idle_cpu = -1; int best_idle_cstate = INT_MAX; @@ -6349,10 +6349,10 @@ static inline int find_best_target(struct task_struct *p, bool boosted) if (new_util < cur_capacity) { if (cpu_rq(i)->nr_running) { - if (target_capacity == 0 || - target_capacity > cur_capacity) { + if (target_util == 0 || + target_util > new_util) { target_cpu = i; - target_capacity = cur_capacity; + target_util = new_util; } } else if (!boosted) { if (best_idle_cpu < 0 || -- GitLab From 42503db7cabce1d197cb02dea3ef02ea93fc4f74 Mon Sep 17 00:00:00 2001 From: Srinath Sridharan Date: Thu, 14 Jul 2016 13:09:03 -0700 Subject: [PATCH 1195/1442] ANDROID: sched/tune: Introducing a new schedtune attribute prefer_idle Hint to enable biasing of tasks towards idle cpus, even when a given task is negatively boosted. The mechanism allows upto 20% reduction in camera power without hurting performance. bug: 28312446 Change-Id: I97ea5671aa1e6bcb165408b41e17bc82e41c2c9e Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 23 +++++++++++++---------- kernel/sched/tune.c | 42 ++++++++++++++++++++++++++++++++++++++++++ kernel/sched/tune.h | 2 ++ 3 files changed, 57 insertions(+), 10 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 23047b2738cd..9535cc2e2eb1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6289,7 +6289,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) return target; } -static inline int find_best_target(struct task_struct *p, bool boosted) +static inline int find_best_target(struct task_struct *p, bool prefer_idle) { int iter_cpu; int target_cpu = -1; @@ -6307,9 +6307,9 @@ static inline int find_best_target(struct task_struct *p, bool boosted) int idle_idx; /* - * favor higher cpus for boosted tasks + * favor higher cpus for tasks that prefer idle cores */ - int i = boosted ? NR_CPUS-iter_cpu-1 : iter_cpu; + int i = prefer_idle ? NR_CPUS-iter_cpu-1 : iter_cpu; if (!cpu_online(i) || !cpumask_test_cpu(i, tsk_cpus_allowed(p))) continue; @@ -6334,10 +6334,10 @@ static inline int find_best_target(struct task_struct *p, bool boosted) continue; #endif /* - * For boosted tasks we favor idle cpus unconditionally to + * Unconditionally favoring tasks that prefer idle cpus to * improve latency. */ - if (idle_cpu(i) && boosted) { + if (idle_cpu(i) && prefer_idle) { if (best_idle_cpu < 0) best_idle_cpu = i; continue; @@ -6354,7 +6354,7 @@ static inline int find_best_target(struct task_struct *p, bool boosted) target_cpu = i; target_util = new_util; } - } else if (!boosted) { + } else if (!prefer_idle) { if (best_idle_cpu < 0 || (sysctl_sched_cstate_aware && best_idle_cstate > idle_idx)) { @@ -6369,7 +6369,7 @@ static inline int find_best_target(struct task_struct *p, bool boosted) } } - if (boosted && best_idle_cpu >= 0) + if (prefer_idle && best_idle_cpu >= 0) target_cpu = best_idle_cpu; else if (target_cpu < 0) target_cpu = best_idle_cpu >= 0 ? best_idle_cpu : backup_cpu; @@ -6461,14 +6461,17 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync) */ #ifdef CONFIG_CGROUP_SCHEDTUNE bool boosted = schedtune_task_boost(p) > 0; + bool prefer_idle = schedtune_prefer_idle(p) > 0; #else bool boosted = 0; + bool prefer_idle = 0; #endif - int tmp_target = find_best_target(p, boosted); - if (tmp_target >= 0) + int tmp_target = find_best_target(p, boosted || prefer_idle); + if (tmp_target >= 0) { target_cpu = tmp_target; - if (boosted && idle_cpu(target_cpu)) + if ((boosted || prefer_idle) && idle_cpu(target_cpu)) return target_cpu; + } } if (target_cpu != task_cpu(p)) { diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index dd92a583057f..17d97f372bf7 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -125,6 +125,10 @@ struct schedtune { /* Performance Constraint (C) region threshold params */ int perf_constrain_idx; + + /* Hint to bias scheduling of tasks on that SchedTune CGroup + * towards idle CPUs */ + int prefer_idle; }; static inline struct schedtune *css_st(struct cgroup_subsys_state *css) @@ -156,6 +160,7 @@ root_schedtune = { .boost = 0, .perf_boost_idx = 0, .perf_constrain_idx = 0, + .prefer_idle = 0, }; int @@ -536,6 +541,38 @@ int schedtune_task_boost(struct task_struct *p) return task_boost; } +int schedtune_prefer_idle(struct task_struct *p) +{ + struct schedtune *st; + int prefer_idle; + + /* Get prefer_idle value */ + rcu_read_lock(); + st = task_schedtune(p); + prefer_idle = st->prefer_idle; + rcu_read_unlock(); + + return prefer_idle; +} + +static u64 +prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft) +{ + struct schedtune *st = css_st(css); + + return st->prefer_idle; +} + +static int +prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft, + u64 prefer_idle) +{ + struct schedtune *st = css_st(css); + st->prefer_idle = prefer_idle; + + return 0; +} + static s64 boost_read(struct cgroup_subsys_state *css, struct cftype *cft) { @@ -587,6 +624,11 @@ static struct cftype files[] = { .read_s64 = boost_read, .write_s64 = boost_write, }, + { + .name = "prefer_idle", + .read_u64 = prefer_idle_read, + .write_u64 = prefer_idle_write, + }, { } /* terminate */ }; diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h index be1785eb1c5b..4f6441771e4c 100644 --- a/kernel/sched/tune.h +++ b/kernel/sched/tune.h @@ -17,6 +17,8 @@ struct target_nrg { int schedtune_cpu_boost(int cpu); int schedtune_task_boost(struct task_struct *tsk); +int schedtune_prefer_idle(struct task_struct *tsk); + void schedtune_exit_task(struct task_struct *tsk); void schedtune_enqueue_task(struct task_struct *p, int cpu); -- GitLab From 8e45d941282039d5379f4e286e5bd0a2044e105c Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Wed, 10 Feb 2016 09:24:36 +0000 Subject: [PATCH 1196/1442] ANDROID: DEBUG: sched: add tracepoint for RD overutilized Signed-off-by: Patrick Bellasi Signed-off-by: Andres Oportus --- include/trace/events/sched.h | 20 ++++++++++++++++++++ kernel/sched/fair.c | 17 +++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 230dc4368655..a1edd59ad81f 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -912,6 +912,26 @@ TRACE_EVENT(sched_tune_filter, __entry->payoff, __entry->region) ); +/* + * Tracepoint for system overutilized flag + */ +TRACE_EVENT(sched_overutilized, + + TP_PROTO(bool overutilized), + + TP_ARGS(overutilized), + + TP_STRUCT__entry( + __field( bool, overutilized ) + ), + + TP_fast_assign( + __entry->overutilized = overutilized; + ), + + TP_printk("overutilized=%d", + __entry->overutilized ? 1 : 0) +); #ifdef CONFIG_SCHED_WALT struct rq; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9535cc2e2eb1..02b36ca97da6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4637,8 +4637,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (!se) { walt_inc_cumulative_runnable_avg(rq, p); if (!task_new && !rq->rd->overutilized && - cpu_overutilized(rq->cpu)) + cpu_overutilized(rq->cpu)) { rq->rd->overutilized = true; + trace_sched_overutilized(true); + } /* * We want to potentially trigger a freq switch @@ -8190,12 +8192,17 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd env->dst_rq->rd->overload = overload; /* Update over-utilization (tipping point, U >= 0) indicator */ - if (env->dst_rq->rd->overutilized != overutilized) + if (env->dst_rq->rd->overutilized != overutilized) { env->dst_rq->rd->overutilized = overutilized; + trace_sched_overutilized(overutilized); + } } else { - if (!env->dst_rq->rd->overutilized && overutilized) + if (!env->dst_rq->rd->overutilized && overutilized) { env->dst_rq->rd->overutilized = true; + trace_sched_overutilized(true); + } } + } /** @@ -9586,8 +9593,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) task_tick_numa(rq, curr); #ifdef CONFIG_SMP - if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) + if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) { rq->rd->overutilized = true; + trace_sched_overutilized(true); + } rq->misfit_task = !task_fits_max(curr, rq->cpu); #endif -- GitLab From f472539f9bf7cd8d09520d5c1b32cc2bdf4ac736 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Fri, 29 Jul 2016 16:09:03 +0100 Subject: [PATCH 1197/1442] ANDROID: FIXUP: sched/tune: do initialization as a postcore_initicall SchedTune needs to walk the scheduling domains to compute the energy normalization constants used for PE space filtering. To build such constants we need the energy model data for each CPU in the system. However, by walking the SDs as a late initcall stage, the userspace has been already initialized and it could happen that some CPUs are hotplugged out. For example, this could happen if a user-space thermal manager daemon detects that CPUs are to much hot during the boot process. To avoid such a race condition we can anticipate the SchedTune initialization code to be a postcore_initicall. This allows to keep the SchedTune initialization code as simple as an initcall while still safely relaying on SDs provided data. Such calls are executed before user-space is initialized and thus, apart from the case of unlucky early-init kernel space generated hotplugs, this solution should be safe enough to get all the data we need. Signed-off-by: Patrick Bellasi [jstultz: fwdported to 4.4] Signed-off-by: John Stultz Signed-off-by: Andres Oportus --- kernel/sched/tune.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 17d97f372bf7..6afc27cc722e 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -946,4 +946,4 @@ schedtune_init(void) rcu_read_unlock(); return -EINVAL; } -late_initcall(schedtune_init); +postcore_initcall(schedtune_init); -- GitLab From 5ce4e785e00f049c4a5a52c730ed90e664311605 Mon Sep 17 00:00:00 2001 From: Srinath Sridharan Date: Tue, 2 Aug 2016 14:05:46 -0700 Subject: [PATCH 1198/1442] ANDROID: sched/fair: Picking cpus with low OPPs for tasks that prefer idle CPUs When idle cpus cannot be found for Top-app/FG tasks, the cpu selection algorithm picks a cpu with lowest OPP amongst the busy cpus as a second choice. Mitigates the "runnable" time for ui and render threads. bug: 30481949 bug: 30342017 bug: 30508678 Change-Id: I5a97e31d33284895c0fa6f6942102713ee576d77 Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 02b36ca97da6..8ae4f6adabc9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6351,10 +6351,22 @@ static inline int find_best_target(struct task_struct *p, bool prefer_idle) if (new_util < cur_capacity) { if (cpu_rq(i)->nr_running) { - if (target_util == 0 || - target_util > new_util) { - target_cpu = i; - target_util = new_util; + if(prefer_idle) { + // Find a target cpu with lowest + // utilization. + if (target_util == 0 || + target_util < new_util) { + target_cpu = i; + target_util = new_util; + } + } else { + // Find a target cpu with highest + // utilization. + if (target_util == 0 || + target_util > new_util) { + target_cpu = i; + target_util = new_util; + } } } else if (!prefer_idle) { if (best_idle_cpu < 0 || @@ -6366,6 +6378,7 @@ static inline int find_best_target(struct task_struct *p, bool prefer_idle) } } else if (backup_capacity == 0 || backup_capacity > cur_capacity) { + // Find a backup cpu with least capacity. backup_capacity = cur_capacity; backup_cpu = i; } -- GitLab From 6344206362f928e8e1223b4c7641d311095f2d7b Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Thu, 4 Aug 2016 12:20:04 +0100 Subject: [PATCH 1199/1442] ANDROID: sched/cpufreq_sched: fix thermal capping events cpufreq_sched_limits (called when CPUFREQ_GOV_LIMITS event happens) bails out if policy->rwsem is already locked. However, that rwsem is always guaranteed to be locked when we get here after a thermal throttling event happens: th_throttling -> cpufreq_update_policy() ... down_write(&policy->rwsem); ... cpufreq_set_policy() -> ... __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); -> cpufreq_sched_limits() ... if (!down_write_trylock(&policy->rwsem)) return; <-- BAIL OUT! So, we don't currently react immediately to thermal capping event (even if reaction is still quick in practice, ~1ms, as lots of events are likely to trigger a frequency selection on a high loaded system). Fix this bug by removing the bail out condition. While we are at it we also slightly change handling of the new limits by clamping the last requested_freq between policy's max and min. Doing so gives us the oppurtunity to correctly restore the last requested frequency as soon as a thermal unthrottling event happens. bug: 30481949 Change-Id: I3c13e818f238c1ffa66b34e419e8b87314b57427 Suggested-by: Javi Merino Signed-off-by: Juri Lelli Signed-off-by: Srinath Sridharan [jstultz: fwdported to 4.4] Signed-off-by: John Stultz Signed-off-by: Andres Oportus --- kernel/sched/cpufreq_sched.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c index 83ffa79375fb..f01fab2ebe1e 100644 --- a/kernel/sched/cpufreq_sched.c +++ b/kernel/sched/cpufreq_sched.c @@ -58,7 +58,6 @@ struct gov_data { struct task_struct *task; struct irq_work irq_work; unsigned int requested_freq; - int max; }; static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy, @@ -193,7 +192,7 @@ static void update_fdomain_capacity_request(int cpu) } /* Convert the new maximum capacity request into a cpu frequency */ - freq_new = capacity * gd->max >> SCHED_CAPACITY_SHIFT; + freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT; index_new = cpufreq_frequency_table_target(policy, freq_new, CPUFREQ_RELATION_L); freq_new = policy->freq_table[index_new].frequency; @@ -285,8 +284,6 @@ static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) pr_debug("%s: throttle threshold = %u [ns]\n", __func__, gd->up_throttle_nsec); - gd->max = policy->max; - rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr()); if (rc) { pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc); @@ -348,28 +345,17 @@ static int cpufreq_sched_start(struct cpufreq_policy *policy) static void cpufreq_sched_limits(struct cpufreq_policy *policy) { - struct gov_data *gd; + unsigned int clamp_freq; + struct gov_data *gd = policy->governor_data;; pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n", policy->cpu, policy->min, policy->max, policy->cur); - if (!down_write_trylock(&policy->rwsem)) - return; - /* - * Need to keep track of highest max frequency for - * capacity calculations - */ - gd = policy->governor_data; - if (gd->max < policy->max) - gd->max = policy->max; + clamp_freq = clamp(gd->requested_freq, policy->min, policy->max); - if (policy->max < policy->cur) - __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); - else if (policy->min > policy->cur) - __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); - - up_write(&policy->rwsem); + if (policy->cur != clamp_freq) + __cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L); } static void cpufreq_sched_stop(struct cpufreq_policy *policy) -- GitLab From 6d9689e81b91b334011aafcb2b5ea0ea44fd4c1c Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Fri, 13 May 2016 11:54:04 +0100 Subject: [PATCH 1200/1442] ANDROID: sched/fair: call OPP update when going idle after migration When a task leaves a rq because it is migrated away it carries its utilization with him. In this case and OPP update on the src rq might be needed. The corresponding update at dst rq will happen at enqueue time. Change-Id: I22754a43760fc8d22a488fe15044af93787ea7a8 sched/fair: Fix uninitialised variable in idle_balance compiler warned, looks legit. Signed-off-by: Chris Redpath Signed-off-by: Andres Oportus --- kernel/sched/fair.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8ae4f6adabc9..f74550bf846a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8972,6 +8972,7 @@ static int idle_balance(struct rq *this_rq) struct sched_domain *sd; int pulled_task = 0; u64 curr_cost = 0; + long removed_util=0; /* * We must set idle_stamp _before_ calling idle_balance(), such that we @@ -8993,6 +8994,17 @@ static int idle_balance(struct rq *this_rq) raw_spin_unlock(&this_rq->lock); + /* + * If removed_util_avg is !0 we most probably migrated some task away + * from this_cpu. In this case we might be willing to trigger an OPP + * update, but we want to do so if we don't find anybody else to pull + * here (we will trigger an OPP update with the pulled task's enqueue + * anyway). + * + * Record removed_util before calling update_blocked_averages, and use + * it below (before returning) to see if an OPP update is required. + */ + removed_util = atomic_long_read(&(this_rq->cfs).removed_util_avg); update_blocked_averages(this_cpu); rcu_read_lock(); for_each_domain(this_cpu, sd) { @@ -9056,6 +9068,13 @@ static int idle_balance(struct rq *this_rq) if (pulled_task) this_rq->idle_stamp = 0; + else if (removed_util) { + /* + * No task pulled and someone has been migrated away. + * Good case to trigger an OPP update. + */ + update_capacity_of(this_cpu); + } return pulled_task; } -- GitLab From ae8edda2ff82347471cc5bc08899fa0d75c2053d Mon Sep 17 00:00:00 2001 From: Srinath Sridharan Date: Fri, 29 Jul 2016 17:50:11 +0100 Subject: [PATCH 1201/1442] ANDROID: sched/fair: Favor higher cpus only for boosted tasks This CL separates the notion of boost and prefer_idle schedtune attributes in cpu selection. Today only top-app tasks are boosted. The CPU selection is slightly tweaked such that higher order cpus are preferred only for boosted tasks (top-app) and the rest would be skewed towards lower order cpus. This avoids starvation issues for fg tasks when interacting with high priority top-app tasks (a problem often seen in the case of system_server). bug: 30245369 bug: 30292998 Change-Id: I0377e00893b9f6586eec55632a265518fd2fa8a1 Signed-off-by: Andres Oportus Conflicts: kernel/sched/fair.c --- kernel/sched/fair.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f74550bf846a..0771ad827e3b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6291,7 +6291,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) return target; } -static inline int find_best_target(struct task_struct *p, bool prefer_idle) +static inline int find_best_target(struct task_struct *p, bool boosted, bool prefer_idle) { int iter_cpu; int target_cpu = -1; @@ -6309,9 +6309,9 @@ static inline int find_best_target(struct task_struct *p, bool prefer_idle) int idle_idx; /* - * favor higher cpus for tasks that prefer idle cores + * Iterate from higher cpus for boosted tasks. */ - int i = prefer_idle ? NR_CPUS-iter_cpu-1 : iter_cpu; + int i = boosted ? NR_CPUS-iter_cpu-1 : iter_cpu; if (!cpu_online(i) || !cpumask_test_cpu(i, tsk_cpus_allowed(p))) continue; @@ -6481,7 +6481,7 @@ static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync) bool boosted = 0; bool prefer_idle = 0; #endif - int tmp_target = find_best_target(p, boosted || prefer_idle); + int tmp_target = find_best_target(p, boosted, prefer_idle); if (tmp_target >= 0) { target_cpu = tmp_target; if ((boosted || prefer_idle) && idle_cpu(target_cpu)) -- GitLab From 989a768d7c0419db322c7ffab3c932a597a126af Mon Sep 17 00:00:00 2001 From: Matt Wagantall Date: Tue, 17 Jun 2014 21:43:35 -0700 Subject: [PATCH 1202/1442] ANDROID: sched/rt: print RT tasks when RT throttling is activated Existing debug prints do not provide any clues about which tasks may have triggered RT throttling. Print the names and PIDs of all tasks on the throttled rt_rq to help narrow down the source of the problem. Change-Id: I180534c8a647254ed38e89d0c981a8f8bccd741c Signed-off-by: Matt Wagantall [rameezmustafa@codeaurora.org]: Port to msm-3.18] Signed-off-by: Syed Rameez Mustafa Signed-off-by: Andres Oportus --- kernel/sched/rt.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index cc031eb193a6..06fc6c909ee9 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -899,6 +899,42 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se) return rt_task_of(rt_se)->prio; } +static void dump_throttled_rt_tasks(struct rt_rq *rt_rq) +{ + struct rt_prio_array *array = &rt_rq->active; + struct sched_rt_entity *rt_se; + char buf[500]; + char *pos = buf; + char *end = buf + sizeof(buf); + int idx; + + pos += snprintf(pos, sizeof(buf), + "sched: RT throttling activated for rt_rq %p (cpu %d)\n", + rt_rq, cpu_of(rq_of_rt_rq(rt_rq))); + + if (bitmap_empty(array->bitmap, MAX_RT_PRIO)) + goto out; + + pos += snprintf(pos, end - pos, "potential CPU hogs:\n"); + idx = sched_find_first_bit(array->bitmap); + while (idx < MAX_RT_PRIO) { + list_for_each_entry(rt_se, array->queue + idx, run_list) { + struct task_struct *p; + + if (!rt_entity_is_task(rt_se)) + continue; + + p = rt_task_of(rt_se); + if (pos < end) + pos += snprintf(pos, end - pos, "\t%s (%d)\n", + p->comm, p->pid); + } + idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx + 1); + } +out: + printk_deferred("%s", buf); +} + static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) { u64 runtime = sched_rt_runtime(rt_rq); @@ -922,8 +958,14 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) * but accrue some time due to boosting. */ if (likely(rt_b->rt_runtime)) { + static bool once = false; + rt_rq->rt_throttled = 1; - printk_deferred_once("sched: RT throttling activated\n"); + + if (!once) { + once = true; + dump_throttled_rt_tasks(rt_rq); + } } else { /* * In case we did anyway, make it go away, -- GitLab From 345eb978a6b549ca4ba67de9a19f00b788bd0759 Mon Sep 17 00:00:00 2001 From: Matt Wagantall Date: Thu, 19 Jun 2014 14:23:33 -0700 Subject: [PATCH 1203/1442] ANDROID: sched/rt: Add Kconfig option to enable panicking for RT throttling This may be useful for detecting and debugging RT throttling issues. Change-Id: I5807a897d11997d76421c1fcaa2918aad988c6c9 Signed-off-by: Matt Wagantall [rameezmustafa@codeaurora.org]: Port to msm-3.18] Signed-off-by: Syed Rameez Mustafa [jstultz: forwardported to 4.4] Signed-off-by: John Stultz Signed-off-by: Andres Oportus --- kernel/sched/rt.c | 9 +++++++++ lib/Kconfig.debug | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 06fc6c909ee9..f8006b20d388 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -932,7 +932,16 @@ static void dump_throttled_rt_tasks(struct rt_rq *rt_rq) idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx + 1); } out: +#ifdef CONFIG_PANIC_ON_RT_THROTTLING + /* + * Use pr_err() in the BUG() case since printk_sched() will + * not get flushed and deadlock is not a concern. + */ + pr_err("%s", buf); + BUG(); +#else printk_deferred("%s", buf); +#endif } static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1941f80206f8..8afce47c31a1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -935,6 +935,15 @@ config SCHED_INFO bool default n +config PANIC_ON_RT_THROTTLING + bool "Panic on RT throttling" + help + Say Y here to enable the kernel to panic when a realtime + runqueue is throttled. This may be useful for detecting + and debugging RT throttling issues. + + Say N if unsure. + config SCHEDSTATS bool "Collect scheduler statistics" depends on DEBUG_KERNEL && PROC_FS -- GitLab From e57a7407fcc9d460414c96cb964e45c979290dc0 Mon Sep 17 00:00:00 2001 From: Ricky Liang Date: Tue, 2 Feb 2016 01:12:06 +0800 Subject: [PATCH 1204/1442] ANDROID: FIXUP: sched: scheduler-driven cpu frequency selection Two fixups that have been reported on LKML. The next version of scheduler-driver cpu frequency selection patch set should include these fixes and we can drop this patch then. Signed-off-by: Ricky Liang Change-Id: Ia2f8b5c0dd5dac06580256eeb4b259929688af68 Signed-off-by: Andres Oportus --- kernel/sched/cpufreq_sched.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c index f01fab2ebe1e..e65ff83fae06 100644 --- a/kernel/sched/cpufreq_sched.c +++ b/kernel/sched/cpufreq_sched.c @@ -131,6 +131,8 @@ static int cpufreq_sched_thread(void *data) new_request = gd->requested_freq; if (new_request == last_request) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; schedule(); } else { /* @@ -290,6 +292,7 @@ static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) goto err; } + policy->governor_data = gd; if (cpufreq_driver_is_slow()) { cpufreq_driver_slow = true; gd->task = kthread_create(cpufreq_sched_thread, policy, @@ -306,12 +309,12 @@ static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) init_irq_work(&gd->irq_work, cpufreq_sched_irq_work); } - policy->governor_data = gd; set_sched_freq(); return 0; err: + policy->governor_data = NULL; kfree(gd); return -ENOMEM; } -- GitLab From 69e2e59064eb409904fabbcba9acd57fc742619c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 18 Oct 2016 12:35:03 -0700 Subject: [PATCH 1205/1442] ANDROID: cgroup: Remove leftover instances of allow_attach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: kernel/sched/tune.c:718:2: error: unknown field ‘allow_attach’ specified in initializer kernel/cpuset.c:2087:2: error: unknown field 'allow_attach' specified in initializer Change-Id: Ie524350ffc6158f3182d90095cca502e58b6f197 Fixes: e78f134a78a0 ("CHROMIUM: remove Android's cgroup generic permissions checks") Signed-off-by: Guenter Roeck Signed-off-by: Andres Oportus --- kernel/sched/tune.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 6afc27cc722e..8102c3fdb061 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -368,12 +368,6 @@ void schedtune_enqueue_task(struct task_struct *p, int cpu) raw_spin_unlock_irqrestore(&bg->lock, irq_flags); } -int schedtune_allow_attach(struct cgroup_taskset *tset) -{ - /* We always allows tasks to be moved between existing CGroups */ - return 0; -} - int schedtune_can_attach(struct cgroup_taskset *tset) { struct task_struct *task; @@ -715,7 +709,6 @@ schedtune_css_free(struct cgroup_subsys_state *css) struct cgroup_subsys schedtune_cgrp_subsys = { .css_alloc = schedtune_css_alloc, .css_free = schedtune_css_free, -// .allow_attach = schedtune_allow_attach, .can_attach = schedtune_can_attach, .cancel_attach = schedtune_cancel_attach, .legacy_cftypes = files, -- GitLab From 751e509391349c6a4b29a935375b526420eb8ed5 Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Fri, 25 Nov 2016 13:38:45 +0800 Subject: [PATCH 1206/1442] ANDROID: sched: tune: Fix lacking spinlock initialization The spinlock used by boost_groups in sched tune must be initialized. This commit fixes this lack and the following errors: [ 0.384739] c2 BUG: spinlock bad magic on CPU#2, swapper/2/0 [ 0.390313] c2 lock: 0xffffffc15fe1fc80, .magic:00000000, .owner: /-1, .owner_cpu: 0 [ 0.398739] c2 CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.4.6+ #4 [ 0.404816] c2 Hardware name: Spreadtrum SP9860gBoard (DT) [ 0.410462] c2 Call trace: [ 0.413159] c2 [] dump_backtrace+0x0/0x210 [ 0.418803] c2 [] show_stack+0x20/0x28 [ 0.424100] c2 [] dump_stack+0xa8/0xe0 [ 0.429398] c2 [] spin_dump+0x78/0x9c [ 0.434608] c2 [] spin_bug+0x30/0x3c [ 0.439644] c2 [] do_raw_spin_lock+0xac/0x1b4 [ 0.445639] c2 [] _raw_spin_lock_irqsave+0x58/0x68 [ 0.451977] c2 [] schedtune_enqueue_task+0x84/0x3bc [ 0.458320] c2 [] enqueue_task_fair+0x438/0x208c [ 0.464487] c2 [] activate_task+0x70/0xd0 [ 0.470130] c2 [] ttwu_do_activate.constprop.131+0x4c/0x98 [ 0.477079] c2 [] try_to_wake_up+0x254/0x54c [ 0.482899] c2 [] default_wake_function+0x30/0x3c [ 0.489154] c2 [] autoremove_wake_function+0x3c/0x6c [ 0.495754] c2 [] __wake_up_common+0x64/0xa4 [ 0.501574] c2 [] __wake_up+0x48/0x60 [ 0.506788] c2 [] rcu_gp_kthread_wake+0x50/0x5c [ 0.512866] c2 [] note_gp_changes+0xac/0xd4 [ 0.518597] c2 [] rcu_process_callbacks+0xe8/0x93c [ 0.524940] c2 [] __do_softirq+0x24c/0x5b8 [ 0.530584] c2 [] irq_exit+0xc0/0xec [ 0.535623] c2 [] __handle_domain_irq+0x94/0xf8 [ 0.541789] c2 [] gic_handle_irq+0x64/0xc0 Signed-off-by: Ke Wang Signed-off-by: Andres Oportus --- kernel/sched/tune.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 8102c3fdb061..96751f419d5b 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -725,6 +725,7 @@ schedtune_init_cgroups(void) for_each_possible_cpu(cpu) { bg = &per_cpu(cpu_boost_groups, cpu); memset(bg, 0, sizeof(struct boost_groups)); + raw_spin_lock_init(&bg->lock); } pr_info("schedtune: configured to support %d boost groups\n", -- GitLab From 82ab24375a2e985b1e15088aea0009b638c47123 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Wed, 24 Aug 2016 11:02:29 +0100 Subject: [PATCH 1207/1442] ANDROID: FIXUP: sched/tune: add fixes missing from a previous patch The previous patch: e7ce26f - FIXUP: sched/tune: fix accounting for runnable tasks squashed together patches of a series to fix SchedTune's accounting issues. However, in the consolidation and cleanup of the series to merge in the Android Common Kernel, we somehow missed a couple of important changes: 1) the schedtune_exit function is not more required, because e7ce26f fixes accounting of exiting tasks in a different way 2) the schedtune_initialized flag was not set at the end of scheddtune_init_cgroup() thus failing to enabled SchedTune at boot. This patch thus is to be considered an integration of e7ce26f. Signed-off-by: Patrick Bellasi [jstultz: Cherry-picked from android-3.18. It should be noted that some of this patch was already applied in the 4.4 patches (schedtune_exit doesn't exist for example), but this patch just ensures things are totally synced up] Signed-off-by: John Stultz Signed-off-by: Andres Oportus --- kernel/sched/tune.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 96751f419d5b..d1fe64c91e52 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -730,6 +730,8 @@ schedtune_init_cgroups(void) pr_info("schedtune: configured to support %d boost groups\n", BOOSTGROUPS_COUNT); + + schedtune_initialized = true; } #else /* CONFIG_CGROUP_SCHEDTUNE */ -- GitLab From 86ee6345d46d81122f795956c875f1b8137c0769 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 15 Nov 2016 11:58:52 +0530 Subject: [PATCH 1208/1442] ANDROID: cpufreq: sched: Fix kernel crash on accessing sysfs file If the cpufreq driver hasn't set the CPUFREQ_HAVE_GOVERNOR_PER_POLICY flag, then the kernel will crash on accessing sysfs files for the sched governor. CPUFreq governors we can have the governor specific sysfs files in two places: A. /sys/devices/system/cpu/cpuX/cpufreq/ B. /sys/devices/system/cpu/cpufreq/ The case A. is for governor per policy case, where we can control the governor tunables for each policy separately. The case B. is for system wide tunable values. The schedfreq governor only implements the case A. and not B. The sysfs files in case B will still be present in /sys/devices/system/cpu/cpufreq/, but accessing them will crash kernel as the governor doesn't support that. Moreover the sched governor is pretty new and will be used only for the ARM platforms and there is no need to support the case B at all. Hence use policy->kobj instead of get_governor_parent_kobj(), so that we always create the sysfs files in path A. Signed-off-by: Viresh Kumar Signed-off-by: Andres Oportus --- kernel/sched/cpufreq_sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c index e65ff83fae06..1d471d533414 100644 --- a/kernel/sched/cpufreq_sched.c +++ b/kernel/sched/cpufreq_sched.c @@ -286,7 +286,7 @@ static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) pr_debug("%s: throttle threshold = %u [ns]\n", __func__, gd->up_throttle_nsec); - rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr()); + rc = sysfs_create_group(&policy->kobj, get_sysfs_attr()); if (rc) { pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc); goto err; @@ -329,7 +329,7 @@ static void cpufreq_sched_policy_exit(struct cpufreq_policy *policy) put_task_struct(gd->task); } - sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr()); + sysfs_remove_group(&policy->kobj, get_sysfs_attr()); policy->governor_data = NULL; -- GitLab From 262200a0abcf02a5e65cbcef99355b80c72c7fcf Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Fri, 14 Nov 2014 16:25:50 +0000 Subject: [PATCH 1209/1442] ANDROID: sched/debug: Add energy procfs interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch makes the energy data available via procfs. The related files are placed as sub-directory named 'energy' inside the /proc/sys/kernel/sched_domain/cpuX/domainY/groupZ directory for those cpu/domain/group tuples which have energy information. The following example depicts the contents of /proc/sys/kernel/sched_domain/cpu0/domain0/group[01] for a system which has energy information attached to domain level 0. ├── cpu0 │ ├── domain0 │ │ ├── busy_factor │ │ ├── busy_idx │ │ ├── cache_nice_tries │ │ ├── flags │ │ ├── forkexec_idx │ │ ├── group0 │ │ │ └── energy │ │ │ ├── cap_states │ │ │ ├── idle_states │ │ │ ├── nr_cap_states │ │ │ └── nr_idle_states │ │ ├── group1 │ │ │ └── energy │ │ │ ├── cap_states │ │ │ ├── idle_states │ │ │ ├── nr_cap_states │ │ │ └── nr_idle_states │ │ ├── idle_idx │ │ ├── imbalance_pct │ │ ├── max_interval │ │ ├── max_newidle_lb_cost │ │ ├── min_interval │ │ ├── name │ │ ├── newidle_idx │ │ └── wake_idx │ └── domain1 │ ├── busy_factor │ ├── busy_idx │ ├── cache_nice_tries │ ├── flags │ ├── forkexec_idx │ ├── idle_idx │ ├── imbalance_pct │ ├── max_interval │ ├── max_newidle_lb_cost │ ├── min_interval │ ├── name │ ├── newidle_idx │ └── wake_idx The files 'nr_idle_states' and 'nr_cap_states' contain a scalar value whereas 'idle_states' and 'cap_states' contain a vector of power consumption at this idle state respectively (compute capacity, power consumption) at this capacity state. Signed-off-by: Dietmar Eggemann --- kernel/sched/debug.c | 67 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index fa178b62ea79..48a621c257d1 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -260,10 +260,61 @@ set_table_entry(struct ctl_table *entry, } } +static struct ctl_table * +sd_alloc_ctl_energy_table(struct sched_group_energy *sge) +{ + struct ctl_table *table = sd_alloc_ctl_entry(5); + + if (table == NULL) + return NULL; + + set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states, + sizeof(int), 0644, proc_dointvec_minmax, false); + set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power, + sge->nr_idle_states*sizeof(struct idle_state), 0644, + proc_doulongvec_minmax, false); + set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states, + sizeof(int), 0644, proc_dointvec_minmax, false); + set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap, + sge->nr_cap_states*sizeof(struct capacity_state), 0644, + proc_doulongvec_minmax, false); + + return table; +} + +static struct ctl_table * +sd_alloc_ctl_group_table(struct sched_group *sg) +{ + struct ctl_table *table = sd_alloc_ctl_entry(2); + + if (table == NULL) + return NULL; + + table->procname = kstrdup("energy", GFP_KERNEL); + table->mode = 0555; + table->child = sd_alloc_ctl_energy_table((struct sched_group_energy *)sg->sge); + + return table; +} + static struct ctl_table * sd_alloc_ctl_domain_table(struct sched_domain *sd) { - struct ctl_table *table = sd_alloc_ctl_entry(14); + struct ctl_table *table; + unsigned int nr_entries = 14; + + int i = 0; + struct sched_group *sg = sd->groups; + + if (sg->sge) { + int nr_sgs = 0; + + do {} while (nr_sgs++, sg = sg->next, sg != sd->groups); + + nr_entries += nr_sgs; + } + + table = sd_alloc_ctl_entry(nr_entries); if (table == NULL) return NULL; @@ -296,7 +347,19 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) sizeof(long), 0644, proc_doulongvec_minmax, false); set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false); - /* &table[13] is terminator */ + sg = sd->groups; + if (sg->sge) { + char buf[32]; + struct ctl_table *entry = &table[13]; + + do { + snprintf(buf, 32, "group%d", i); + entry->procname = kstrdup(buf, GFP_KERNEL); + entry->mode = 0555; + entry->child = sd_alloc_ctl_group_table(sg); + } while (entry++, i++, sg = sg->next, sg != sd->groups); + } + /* &table[nr_entries-1] is terminator */ return table; } -- GitLab From 2b314625ab50c6866b11331dcad662c7ac40c05c Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 13 Apr 2016 16:38:34 -0700 Subject: [PATCH 1210/1442] ANDROID: sdcardfs: override umask on mkdir and create The mode on files created on the lower fs should not be affected by the umask of the calling task's fs_struct. Instead, we create a copy and modify it as needed. This also lets us avoid the string shenanigans around .nomedia files. Bug: 27992761 Change-Id: Ia3a6e56c24c6e19b3b01c1827e46403bb71c2f4c Signed-off-by: Daniel Rosenberg --- fs/fs_struct.c | 1 + fs/sdcardfs/inode.c | 70 ++++++++++++++++++++++----------------------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 7dca743b2ce1..005dcb401369 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -127,6 +127,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) } return fs; } +EXPORT_SYMBOL_GPL(copy_fs_struct); int unshare_fs_struct(void) { diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 2528da0d3ae1..4b140ba86955 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -19,6 +19,7 @@ */ #include "sdcardfs.h" +#include /* Do not directly use this function. Use OVERRIDE_CRED() instead. */ const struct cred * override_fsids(struct sdcardfs_sb_info* sbi) @@ -56,6 +57,8 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, struct dentry *lower_parent_dentry = NULL; struct path lower_path; const struct cred *saved_cred = NULL; + struct fs_struct *saved_fs; + struct fs_struct *copied_fs; if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" @@ -74,6 +77,16 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, /* set last 16bytes of mode field to 0664 */ mode = (mode & S_IFMT) | 00664; + + /* temporarily change umask for lower fs write */ + saved_fs = current->fs; + copied_fs = copy_fs_struct(current->fs); + if (!copied_fs) { + err = -ENOMEM; + goto out_unlock; + } + current->fs = copied_fs; + current->fs->umask = 0; err = vfs_create(d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); if (err) goto out; @@ -85,6 +98,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); out: + current->fs = saved_fs; + free_fs_struct(copied_fs); +out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); REVERT_CRED(saved_cred); @@ -245,11 +261,9 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; struct sdcardfs_inode_info *pi = SDCARDFS_I(dir); - char *page_buf; - char *nomedia_dir_name; - char *nomedia_fullpath; - int fullpath_namelen; int touch_err = 0; + struct fs_struct *saved_fs; + struct fs_struct *copied_fs; if(!check_caller_access_to_name(dir, dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" @@ -276,6 +290,16 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* set last 16bytes of mode field to 0775 */ mode = (mode & S_IFMT) | 00775; + + /* temporarily change umask for lower fs write */ + saved_fs = current->fs; + copied_fs = copy_fs_struct(current->fs); + if (!copied_fs) { + err = -ENOMEM; + goto out_unlock; + } + current->fs = copied_fs; + current->fs->umask = 0; err = vfs_mkdir(d_inode(lower_parent_dentry), lower_dentry, mode); if (err) @@ -316,42 +340,18 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* When creating /Android/data and /Android/obb, mark them as .nomedia */ if (make_nomedia_in_obb || ((pi->perm == PERM_ANDROID) && (!strcasecmp(dentry->d_name.name, "data")))) { - - page_buf = (char *)__get_free_page(GFP_KERNEL); - if (!page_buf) { - printk(KERN_ERR "sdcardfs: failed to allocate page buf\n"); - goto out; - } - - nomedia_dir_name = d_absolute_path(&lower_path, page_buf, PAGE_SIZE); - if (IS_ERR(nomedia_dir_name)) { - free_page((unsigned long)page_buf); - printk(KERN_ERR "sdcardfs: failed to get .nomedia dir name\n"); - goto out; - } - - fullpath_namelen = page_buf + PAGE_SIZE - nomedia_dir_name - 1; - fullpath_namelen += strlen("/.nomedia"); - nomedia_fullpath = kzalloc(fullpath_namelen + 1, GFP_KERNEL); - if (!nomedia_fullpath) { - free_page((unsigned long)page_buf); - printk(KERN_ERR "sdcardfs: failed to allocate .nomedia fullpath buf\n"); - goto out; - } - - strcpy(nomedia_fullpath, nomedia_dir_name); - free_page((unsigned long)page_buf); - strcat(nomedia_fullpath, "/.nomedia"); - touch_err = touch(nomedia_fullpath, 0664); + set_fs_pwd(current->fs, &lower_path); + touch_err = touch(".nomedia", 0664); if (touch_err) { - printk(KERN_ERR "sdcardfs: failed to touch(%s): %d\n", - nomedia_fullpath, touch_err); - kfree(nomedia_fullpath); + printk(KERN_ERR "sdcardfs: failed to create .nomedia in %s: %d\n", + lower_path.dentry->d_name.name, touch_err); goto out; } - kfree(nomedia_fullpath); } out: + current->fs = saved_fs; + free_fs_struct(copied_fs); +out_unlock: unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); out_revert: -- GitLab From 7c4dcd2b79c3522ecb5ef972489c246eff17c1e1 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 27 Apr 2016 15:31:29 -0700 Subject: [PATCH 1211/1442] ANDROID: sdcardfs: Check for other cases on path lookup This fixes a bug where the first lookup of a file or folder created under a different view would not be case insensitive. It will now search through for a case insensitive match if the initial lookup fails. Bug:28024488 Change-Id: I4ff9ce297b9f2f9864b47540e740fd491c545229 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/lookup.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index a01b06a514fd..a127d05b5054 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -240,6 +240,28 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, /* Use vfs_path_lookup to check if the dentry exists or not */ err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, &lower_path); + /* check for other cases */ + if (err == -ENOENT) { + struct dentry *child; + struct dentry *match = NULL; + spin_lock(&lower_dir_dentry->d_lock); + list_for_each_entry(child, &lower_dir_dentry->d_subdirs, d_child) { + if (child && d_inode(child)) { + if (strcasecmp(child->d_name.name, name)==0) { + match = dget(child); + break; + } + } + } + spin_unlock(&lower_dir_dentry->d_lock); + if (match) { + err = vfs_path_lookup(lower_dir_dentry, + lower_dir_mnt, + match->d_name.name, 0, + &lower_path); + dput(match); + } + } /* no error: handle positive dentries */ if (!err) { -- GitLab From cb1b9458255951511874693e76560c64b8269c63 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 16 Aug 2016 15:19:26 -0700 Subject: [PATCH 1212/1442] ANDROID: sdcardfs: Fix locking for permission fix up Iterating over d_subdirs requires taking d_lock. Removed several unneeded locks. Change-Id: I5b1588e54c7e6ee19b756d6705171c7f829e2650 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/derived_perm.c | 6 ++---- fs/sdcardfs/inode.c | 2 -- fs/sdcardfs/lookup.c | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 41e0e11b3c35..bfe402b8cf32 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -111,15 +111,15 @@ void get_derived_permission(struct dentry *parent, struct dentry *dentry) void get_derive_permissions_recursive(struct dentry *parent) { struct dentry *dentry; + spin_lock(&parent->d_lock); list_for_each_entry(dentry, &parent->d_subdirs, d_child) { if (dentry->d_inode) { - mutex_lock(&dentry->d_inode->i_mutex); get_derived_permission(parent, dentry); fix_derived_permission(dentry->d_inode); get_derive_permissions_recursive(dentry); - mutex_unlock(&dentry->d_inode->i_mutex); } } + spin_unlock(&parent->d_lock); } /* main function for updating derived permission */ @@ -135,7 +135,6 @@ inline void update_derived_permission_lock(struct dentry *dentry) * 1. need to check whether the dentry is updated or not * 2. remove the root dentry update */ - mutex_lock(&dentry->d_inode->i_mutex); if(IS_ROOT(dentry)) { //setup_default_pre_root_state(dentry->d_inode); } else { @@ -146,7 +145,6 @@ inline void update_derived_permission_lock(struct dentry *dentry) } } fix_derived_permission(dentry->d_inode); - mutex_unlock(&dentry->d_inode->i_mutex); } int need_graft_path(struct dentry *dentry) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 4b140ba86955..1a23c0cc8f58 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -513,11 +513,9 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, } /* At this point, not all dentry information has been moved, so * we pass along new_dentry for the name.*/ - mutex_lock(&d_inode(old_dentry)->i_mutex); get_derived_permission_new(new_dentry->d_parent, old_dentry, new_dentry); fix_derived_permission(d_inode(old_dentry)); get_derive_permissions_recursive(old_dentry); - mutex_unlock(&d_inode(old_dentry)->i_mutex); out: unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); dput(lower_old_dir_dentry); diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index a127d05b5054..c74a7d1bc18e 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -387,11 +387,9 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_inode) { fsstack_copy_attr_times(dentry->d_inode, sdcardfs_lower_inode(dentry->d_inode)); - /* get drived permission */ - mutex_lock(&dentry->d_inode->i_mutex); + /* get derived permission */ get_derived_permission(parent, dentry); fix_derived_permission(dentry->d_inode); - mutex_unlock(&dentry->d_inode->i_mutex); } /* update parent directory's atime */ fsstack_copy_attr_atime(parent->d_inode, -- GitLab From fbd34b6d9ab065d10590e1829bb93685c3f45348 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 10 May 2016 13:42:43 -0700 Subject: [PATCH 1213/1442] ANDROID: sdcardfs: Switch package list to RCU Switched the package id hashmap to use RCU. Change-Id: I9fdcab279009005bf28536247d11e13babab0b93 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/derived_perm.c | 3 +- fs/sdcardfs/packagelist.c | 200 +++++++++++++++++-------------------- fs/sdcardfs/sdcardfs.h | 2 +- 3 files changed, 93 insertions(+), 112 deletions(-) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index bfe402b8cf32..2a75ad873a7c 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -47,7 +47,6 @@ void setup_derived_state(struct inode *inode, perm_t perm, /* While renaming, there is a point where we want the path from dentry, but the name from newdentry */ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry) { - struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct sdcardfs_inode_info *info = SDCARDFS_I(dentry->d_inode); struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); appid_t appid; @@ -96,7 +95,7 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, st case PERM_ANDROID_DATA: case PERM_ANDROID_OBB: case PERM_ANDROID_MEDIA: - appid = get_appid(sbi->pkgl_id, newdentry->d_name.name); + appid = get_appid(newdentry->d_name.name); if (appid != 0) { info->d_uid = multiuser_get_uid(parent_info->userid, appid); } diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index 9c3340528eee..f5a49c513568 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -29,26 +29,13 @@ #include -#define STRING_BUF_SIZE (512) - struct hashtable_entry { struct hlist_node hlist; - void *key; - unsigned int value; -}; - -struct sb_list { - struct super_block *sb; - struct list_head list; + const char *key; + atomic_t value; }; -struct packagelist_data { - DECLARE_HASHTABLE(package_to_appid,8); - struct mutex hashtable_lock; - -}; - -static struct packagelist_data *pkgl_data_all; +static DEFINE_HASHTABLE(package_to_appid, 8); static struct kmem_cache *hashtable_entry_cachep; @@ -64,22 +51,21 @@ static unsigned int str_hash(const char *key) { return h; } -appid_t get_appid(void *pkgl_id, const char *app_name) +appid_t get_appid(const char *app_name) { - struct packagelist_data *pkgl_dat = pkgl_data_all; struct hashtable_entry *hash_cur; unsigned int hash = str_hash(app_name); appid_t ret_id; - mutex_lock(&pkgl_dat->hashtable_lock); - hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { + rcu_read_lock(); + hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) { if (!strcasecmp(app_name, hash_cur->key)) { - ret_id = (appid_t)hash_cur->value; - mutex_unlock(&pkgl_dat->hashtable_lock); + ret_id = atomic_read(&hash_cur->value); + rcu_read_unlock(); return ret_id; } } - mutex_unlock(&pkgl_dat->hashtable_lock); + rcu_read_unlock(); return 0; } @@ -120,116 +106,118 @@ int open_flags_to_access_mode(int open_flags) { } } -static int insert_str_to_int_lock(struct packagelist_data *pkgl_dat, char *key, - unsigned int value) +static struct hashtable_entry *alloc_packagelist_entry(const char *key, + appid_t value) +{ + struct hashtable_entry *ret = kmem_cache_alloc(hashtable_entry_cachep, + GFP_KERNEL); + if (!ret) + return NULL; + + ret->key = kstrdup(key, GFP_KERNEL); + if (!ret->key) { + kmem_cache_free(hashtable_entry_cachep, ret); + return NULL; + } + + atomic_set(&ret->value, value); + return ret; +} + +static int insert_packagelist_entry_locked(const char *key, appid_t value) { struct hashtable_entry *hash_cur; struct hashtable_entry *new_entry; unsigned int hash = str_hash(key); - hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { + hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) { if (!strcasecmp(key, hash_cur->key)) { - hash_cur->value = value; + atomic_set(&hash_cur->value, value); return 0; } } - new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL); + new_entry = alloc_packagelist_entry(key, value); if (!new_entry) return -ENOMEM; - new_entry->key = kstrdup(key, GFP_KERNEL); - new_entry->value = value; - hash_add(pkgl_dat->package_to_appid, &new_entry->hlist, hash); + hash_add_rcu(package_to_appid, &new_entry->hlist, hash); return 0; } static void fixup_perms(struct super_block *sb) { if (sb && sb->s_magic == SDCARDFS_SUPER_MAGIC) { - mutex_lock(&sb->s_root->d_inode->i_mutex); get_derive_permissions_recursive(sb->s_root); - mutex_unlock(&sb->s_root->d_inode->i_mutex); } } -static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key, - unsigned int value) { - int ret; +static void fixup_all_perms(void) +{ struct sdcardfs_sb_info *sbinfo; - mutex_lock(&sdcardfs_super_list_lock); - mutex_lock(&pkgl_dat->hashtable_lock); - ret = insert_str_to_int_lock(pkgl_dat, key, value); - mutex_unlock(&pkgl_dat->hashtable_lock); - - list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { - if (sbinfo) { + list_for_each_entry(sbinfo, &sdcardfs_super_list, list) + if (sbinfo) fixup_perms(sbinfo->sb); - } - } +} + +static int insert_packagelist_entry(const char *key, appid_t value) +{ + int err; + + mutex_lock(&sdcardfs_super_list_lock); + err = insert_packagelist_entry_locked(key, value); + if (!err) + fixup_all_perms(); mutex_unlock(&sdcardfs_super_list_lock); - return ret; + + return err; } -static void remove_str_to_int_lock(struct hashtable_entry *h_entry) { - kfree(h_entry->key); - hash_del(&h_entry->hlist); - kmem_cache_free(hashtable_entry_cachep, h_entry); +static void free_packagelist_entry(struct hashtable_entry *entry) +{ + kfree(entry->key); + hash_del_rcu(&entry->hlist); + kmem_cache_free(hashtable_entry_cachep, entry); } -static void remove_str_to_int(struct packagelist_data *pkgl_dat, const char *key) +static void remove_packagelist_entry_locked(const char *key) { - struct sdcardfs_sb_info *sbinfo; struct hashtable_entry *hash_cur; unsigned int hash = str_hash(key); - mutex_lock(&sdcardfs_super_list_lock); - mutex_lock(&pkgl_dat->hashtable_lock); - hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { + + hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) { if (!strcasecmp(key, hash_cur->key)) { - remove_str_to_int_lock(hash_cur); - break; - } - } - mutex_unlock(&pkgl_dat->hashtable_lock); - list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { - if (sbinfo) { - fixup_perms(sbinfo->sb); + hash_del_rcu(&hash_cur->hlist); + synchronize_rcu(); + free_packagelist_entry(hash_cur); + return; } } +} + +static void remove_packagelist_entry(const char *key) +{ + mutex_lock(&sdcardfs_super_list_lock); + remove_packagelist_entry_locked(key); + fixup_all_perms(); mutex_unlock(&sdcardfs_super_list_lock); return; } -static void remove_all_hashentrys(struct packagelist_data *pkgl_dat) +static void packagelist_destroy(void) { struct hashtable_entry *hash_cur; struct hlist_node *h_t; + HLIST_HEAD(free_list); int i; - mutex_lock(&pkgl_dat->hashtable_lock); - hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist) - remove_str_to_int_lock(hash_cur); - mutex_unlock(&pkgl_dat->hashtable_lock); - hash_init(pkgl_dat->package_to_appid); -} - -static struct packagelist_data * packagelist_create(void) -{ - struct packagelist_data *pkgl_dat; + mutex_lock(&sdcardfs_super_list_lock); + hash_for_each_rcu(package_to_appid, i, hash_cur, hlist) { + hash_del_rcu(&hash_cur->hlist); + hlist_add_head(&hash_cur->hlist, &free_list); - pkgl_dat = kmalloc(sizeof(*pkgl_dat), GFP_KERNEL | __GFP_ZERO); - if (!pkgl_dat) { - printk(KERN_ERR "sdcardfs: Failed to create hash\n"); - return ERR_PTR(-ENOMEM); } - - mutex_init(&pkgl_dat->hashtable_lock); - hash_init(pkgl_dat->package_to_appid); - - return pkgl_dat; -} - -static void packagelist_destroy(struct packagelist_data *pkgl_dat) -{ - remove_all_hashentrys(pkgl_dat); + synchronize_rcu(); + hlist_for_each_entry_safe(hash_cur, h_t, &free_list, hlist) + free_packagelist_entry(hash_cur); + mutex_unlock(&sdcardfs_super_list_lock); printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld\n"); - kfree(pkgl_dat); } struct package_appid { @@ -245,26 +233,21 @@ static inline struct package_appid *to_package_appid(struct config_item *item) static ssize_t package_appid_attr_show(struct config_item *item, char *page) { - ssize_t count; - count = sprintf(page, "%d\n", get_appid(pkgl_data_all, item->ci_name)); - return count; + return scnprintf(page, PAGE_SIZE, "%u\n", get_appid(item->ci_name)); } static ssize_t package_appid_attr_store(struct config_item *item, const char *page, size_t count) { struct package_appid *package_appid = to_package_appid(item); - unsigned long tmp; - char *p = (char *) page; + unsigned int tmp; int ret; - tmp = simple_strtoul(p, &p, 10); - if (!p || (*p && (*p != '\n'))) - return -EINVAL; + ret = kstrtouint(page, 10, &tmp); + if (ret) + return ret; - if (tmp > INT_MAX) - return -ERANGE; - ret = insert_str_to_int(pkgl_data_all, item->ci_name, (unsigned int)tmp); + ret = insert_packagelist_entry(item->ci_name, tmp); package_appid->add_pid = tmp; if (ret) return ret; @@ -289,7 +272,7 @@ static void package_appid_release(struct config_item *item) { printk(KERN_INFO "sdcardfs: removing %s\n", item->ci_dentry->d_name.name); /* item->ci_name is freed already, so we rely on the dentry */ - remove_str_to_int(pkgl_data_all, item->ci_dentry->d_name.name); + remove_packagelist_entry(item->ci_dentry->d_name.name); kfree(to_package_appid(item)); } @@ -333,21 +316,21 @@ static ssize_t packages_attr_show(struct config_item *item, char *page) { struct hashtable_entry *hash_cur; - struct hlist_node *h_t; int i; int count = 0, written = 0; - char errormsg[] = "\n"; + const char errormsg[] = "\n"; - mutex_lock(&pkgl_data_all->hashtable_lock); - hash_for_each_safe(pkgl_data_all->package_to_appid, i, h_t, hash_cur, hlist) { - written = scnprintf(page + count, PAGE_SIZE - sizeof(errormsg) - count, "%s %d\n", (char *)hash_cur->key, hash_cur->value); + rcu_read_lock(); + hash_for_each_rcu(package_to_appid, i, hash_cur, hlist) { + written = scnprintf(page + count, PAGE_SIZE - sizeof(errormsg) - count, "%s %d\n", + (const char *)hash_cur->key, atomic_read(&hash_cur->value)); if (count + written == PAGE_SIZE - sizeof(errormsg)) { count += scnprintf(page + count, PAGE_SIZE - count, errormsg); break; } count += written; } - mutex_unlock(&pkgl_data_all->hashtable_lock); + rcu_read_unlock(); return count; } @@ -430,7 +413,6 @@ int packagelist_init(void) return -ENOMEM; } - pkgl_data_all = packagelist_create(); configfs_sdcardfs_init(); return 0; } @@ -438,7 +420,7 @@ int packagelist_init(void) void packagelist_exit(void) { configfs_sdcardfs_exit(); - packagelist_destroy(pkgl_data_all); + packagelist_destroy(); if (hashtable_entry_cachep) kmem_cache_destroy(hashtable_entry_cachep); } diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index f111f898b630..75284f339ae0 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -396,7 +396,7 @@ extern struct mutex sdcardfs_super_list_lock; extern struct list_head sdcardfs_super_list; /* for packagelist.c */ -extern appid_t get_appid(void *pkgl_id, const char *app_name); +extern appid_t get_appid(const char *app_name); extern int check_caller_access_to_name(struct inode *parent_node, const char* name); extern int open_flags_to_access_mode(int open_flags); extern int packagelist_init(void); -- GitLab From 5080d2476db2052f3574fa32f658a1b6de6b1b2e Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 18 May 2016 16:57:10 -0700 Subject: [PATCH 1214/1442] ANDROID: sdcardfs: Added top to sdcardfs_inode_info Adding packages to the package list and moving files takes a large amount of locks, and is currently a heavy operation. This adds a 'top' field to the inode_info, which points to the inode for the top most directory whose owner you would like to match. On permission checks and get_attr, we look up the owner based on the information at top. When we change a package mapping, we need only modify the information in the corresponding top inode_info's. When renaming, we must ensure top is set correctly in all children. This happens when an app specific folder gets moved outside of the folder for that app. Change-Id: Ib749c60b568e9a45a46f8ceed985c1338246ec6c Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/derived_perm.c | 73 ++++++++++++++++++++++++++++++++++---- fs/sdcardfs/inode.c | 45 +++++++++++++++++++---- fs/sdcardfs/main.c | 4 +-- fs/sdcardfs/packagelist.c | 12 +++---- fs/sdcardfs/sdcardfs.h | 40 ++++++++++++++++++--- fs/sdcardfs/super.c | 1 + 6 files changed, 149 insertions(+), 26 deletions(-) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 2a75ad873a7c..89daf69efbaa 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -30,11 +30,12 @@ static void inherit_derived_state(struct inode *parent, struct inode *child) ci->userid = pi->userid; ci->d_uid = pi->d_uid; ci->under_android = pi->under_android; + set_top(ci, pi->top); } /* helper function for derived state */ -void setup_derived_state(struct inode *inode, perm_t perm, - userid_t userid, uid_t uid, bool under_android) +void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, + uid_t uid, bool under_android, struct inode *top) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); @@ -42,6 +43,7 @@ void setup_derived_state(struct inode *inode, perm_t perm, info->userid = userid; info->d_uid = uid; info->under_android = under_android; + set_top(info, top); } /* While renaming, there is a point where we want the path from dentry, but the name from newdentry */ @@ -70,6 +72,7 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, st /* Legacy internal layout places users at top level */ info->perm = PERM_ROOT; info->userid = simple_strtoul(newdentry->d_name.name, NULL, 10); + set_top(info, &info->vfs_inode); break; case PERM_ROOT: /* Assume masked off by default. */ @@ -77,19 +80,23 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, st /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID; info->under_android = true; + set_top(info, &info->vfs_inode); } break; case PERM_ANDROID: if (!strcasecmp(newdentry->d_name.name, "data")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID_DATA; + set_top(info, &info->vfs_inode); } else if (!strcasecmp(newdentry->d_name.name, "obb")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID_OBB; + set_top(info, &info->vfs_inode); /* Single OBB directory is always shared */ } else if (!strcasecmp(newdentry->d_name.name, "media")) { /* App-specific directories inside; let anyone traverse */ info->perm = PERM_ANDROID_MEDIA; + set_top(info, &info->vfs_inode); } break; case PERM_ANDROID_DATA: @@ -99,6 +106,7 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, st if (appid != 0) { info->d_uid = multiuser_get_uid(parent_info->userid, appid); } + set_top(info, &info->vfs_inode); break; } } @@ -108,14 +116,65 @@ void get_derived_permission(struct dentry *parent, struct dentry *dentry) get_derived_permission_new(parent, dentry, dentry); } -void get_derive_permissions_recursive(struct dentry *parent) { +static int descendant_may_need_fixup(perm_t perm) { + if (perm == PERM_PRE_ROOT || perm == PERM_ROOT || perm == PERM_ANDROID) + return 1; + return 0; +} + +static int needs_fixup(perm_t perm) { + if (perm == PERM_ANDROID_DATA || perm == PERM_ANDROID_OBB + || perm == PERM_ANDROID_MEDIA) + return 1; + return 0; +} + +void fixup_perms_recursive(struct dentry *dentry, const char* name, size_t len) { + struct dentry *child; + struct sdcardfs_inode_info *info; + if (!dget(dentry)) + return; + if (!dentry->d_inode) { + dput(dentry); + return; + } + info = SDCARDFS_I(d_inode(dentry)); + + if (needs_fixup(info->perm)) { + mutex_lock(&d_inode(dentry)->i_mutex); + child = lookup_one_len(name, dentry, len); + mutex_unlock(&d_inode(dentry)->i_mutex); + if (!IS_ERR(child)) { + if (child->d_inode) { + get_derived_permission(dentry, child); + fix_derived_permission(d_inode(child)); + } + dput(child); + } + } else if (descendant_may_need_fixup(info->perm)) { + mutex_lock(&d_inode(dentry)->i_mutex); + list_for_each_entry(child, &dentry->d_subdirs, d_child) { + fixup_perms_recursive(child, name, len); + } + mutex_unlock(&d_inode(dentry)->i_mutex); + } + dput(dentry); +} + +void fixup_top_recursive(struct dentry *parent) { struct dentry *dentry; + struct sdcardfs_inode_info *info; + if (!d_inode(parent)) + return; + info = SDCARDFS_I(d_inode(parent)); spin_lock(&parent->d_lock); list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - if (dentry->d_inode) { - get_derived_permission(parent, dentry); - fix_derived_permission(dentry->d_inode); - get_derive_permissions_recursive(dentry); + if (d_inode(dentry)) { + if (SDCARDFS_I(d_inode(parent))->top != SDCARDFS_I(d_inode(dentry))->top) { + get_derived_permission(parent, dentry); + fix_derived_permission(d_inode(dentry)); + fixup_top_recursive(dentry); + } } } spin_unlock(&parent->d_lock); diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 1a23c0cc8f58..67bcee2c379a 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -515,7 +515,7 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, * we pass along new_dentry for the name.*/ get_derived_permission_new(new_dentry->d_parent, old_dentry, new_dentry); fix_derived_permission(d_inode(old_dentry)); - get_derive_permissions_recursive(old_dentry); + fixup_top_recursive(old_dentry); out: unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); dput(lower_old_dir_dentry); @@ -587,6 +587,16 @@ static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie) static int sdcardfs_permission(struct inode *inode, int mask) { int err; + struct inode *top = grab_top(SDCARDFS_I(inode)); + + if (!top) + return -EINVAL; + /* Ensure owner is up to date */ + if (!uid_eq(inode->i_uid, top->i_uid)) { + SDCARDFS_I(inode)->d_uid = SDCARDFS_I(top)->d_uid; + fix_derived_permission(inode); + } + release_top(SDCARDFS_I(inode)); /* * Permission check on sdcardfs inode. @@ -725,6 +735,30 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) return err; } +static int sdcardfs_fillattr(struct inode *inode, struct kstat *stat) +{ + struct sdcardfs_inode_info *info = SDCARDFS_I(inode); + struct inode *top = grab_top(info); + if (!top) + return -EINVAL; + + stat->dev = inode->i_sb->s_dev; + stat->ino = inode->i_ino; + stat->mode = (inode->i_mode & S_IFMT) | get_mode(SDCARDFS_I(top)); + stat->nlink = inode->i_nlink; + stat->uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid); + stat->gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(top))); + stat->rdev = inode->i_rdev; + stat->size = i_size_read(inode); + stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + stat->ctime = inode->i_ctime; + stat->blksize = (1 << inode->i_blkbits); + stat->blocks = inode->i_blocks; + release_top(info); + return 0; +} + static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -733,6 +767,7 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct inode *lower_inode; struct path lower_path; struct dentry *parent; + int err; parent = dget_parent(dentry); if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { @@ -750,14 +785,12 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, lower_dentry = lower_path.dentry; lower_inode = sdcardfs_lower_inode(inode); - sdcardfs_copy_and_fix_attrs(inode, lower_inode); fsstack_copy_inode_size(inode, lower_inode); - - generic_fillattr(inode, stat); + err = sdcardfs_fillattr(inode, stat); sdcardfs_put_lower_path(dentry, &lower_path); - return 0; + return err; } const struct inode_operations sdcardfs_symlink_iops = { @@ -775,9 +808,7 @@ const struct inode_operations sdcardfs_symlink_iops = { const struct inode_operations sdcardfs_dir_iops = { .create = sdcardfs_create, .lookup = sdcardfs_lookup, -#if 0 .permission = sdcardfs_permission, -#endif .unlink = sdcardfs_unlink, .mkdir = sdcardfs_mkdir, .rmdir = sdcardfs_rmdir, diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index a6522286d731..6d526bf3d956 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -268,13 +268,13 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); mutex_lock(&sdcardfs_super_list_lock); if(sb_info->options.multiuser) { - setup_derived_state(sb->s_root->d_inode, PERM_PRE_ROOT, sb_info->options.fs_user_id, AID_ROOT, false); + setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT, sb_info->options.fs_user_id, AID_ROOT, false, d_inode(sb->s_root)); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); /*err = prepare_dir(sb_info->obbpath_s, sb_info->options.fs_low_uid, sb_info->options.fs_low_gid, 00755);*/ } else { - setup_derived_state(sb->s_root->d_inode, PERM_ROOT, sb_info->options.fs_low_uid, AID_ROOT, false); + setup_derived_state(sb->s_root->d_inode, PERM_ROOT, sb_info->options.fs_low_uid, AID_ROOT, false, sb->s_root->d_inode); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } fix_derived_permission(sb->s_root->d_inode); diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index f5a49c513568..03776fa5f26c 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -143,18 +143,18 @@ static int insert_packagelist_entry_locked(const char *key, appid_t value) return 0; } -static void fixup_perms(struct super_block *sb) { +static void fixup_perms(struct super_block *sb, const char *key) { if (sb && sb->s_magic == SDCARDFS_SUPER_MAGIC) { - get_derive_permissions_recursive(sb->s_root); + fixup_perms_recursive(sb->s_root, key, strlen(key)); } } -static void fixup_all_perms(void) +static void fixup_all_perms(const char *key) { struct sdcardfs_sb_info *sbinfo; list_for_each_entry(sbinfo, &sdcardfs_super_list, list) if (sbinfo) - fixup_perms(sbinfo->sb); + fixup_perms(sbinfo->sb, key); } static int insert_packagelist_entry(const char *key, appid_t value) @@ -164,7 +164,7 @@ static int insert_packagelist_entry(const char *key, appid_t value) mutex_lock(&sdcardfs_super_list_lock); err = insert_packagelist_entry_locked(key, value); if (!err) - fixup_all_perms(); + fixup_all_perms(key); mutex_unlock(&sdcardfs_super_list_lock); return err; @@ -196,7 +196,7 @@ static void remove_packagelist_entry(const char *key) { mutex_lock(&sdcardfs_super_list_lock); remove_packagelist_entry_locked(key); - fixup_all_perms(); + fixup_all_perms(key); mutex_unlock(&sdcardfs_super_list_lock); return; } diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 75284f339ae0..cfda98d257b6 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -169,6 +169,8 @@ struct sdcardfs_inode_info { userid_t userid; uid_t d_uid; bool under_android; + /* top folder for ownership */ + struct inode *top; struct inode vfs_inode; }; @@ -321,6 +323,35 @@ static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \ SDCARDFS_DENT_FUNC(lower_path) SDCARDFS_DENT_FUNC(orig_path) +/* grab a refererence if we aren't linking to ourself */ +static inline void set_top(struct sdcardfs_inode_info *info, struct inode *top) +{ + struct inode *old_top = NULL; + BUG_ON(IS_ERR_OR_NULL(top)); + if (info->top && info->top != &info->vfs_inode) { + old_top = info->top; + } + if (top != &info->vfs_inode) + igrab(top); + info->top = top; + iput(old_top); +} + +static inline struct inode *grab_top(struct sdcardfs_inode_info *info) +{ + struct inode *top = info->top; + if (top) { + return igrab(top); + } else { + return NULL; + } +} + +static inline void release_top(struct sdcardfs_inode_info *info) +{ + iput(info->top); +} + static inline int get_gid(struct sdcardfs_inode_info *info) { struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); if (sb_info->options.gid == AID_SDCARD_RW) { @@ -337,7 +368,7 @@ static inline int get_gid(struct sdcardfs_inode_info *info) { static inline int get_mode(struct sdcardfs_inode_info *info) { int owner_mode; int filtered_mode; - struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); + struct sdcardfs_sb_info * sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); int visible_mode = 0775 & ~sb_info->options.mask; if (info->perm == PERM_PRE_ROOT) { @@ -403,11 +434,12 @@ extern int packagelist_init(void); extern void packagelist_exit(void); /* for derived_perm.c */ -extern void setup_derived_state(struct inode *inode, perm_t perm, - userid_t userid, uid_t uid, bool under_android); +extern void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, + uid_t uid, bool under_android, struct inode *top); extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry); -extern void get_derive_permissions_recursive(struct dentry *parent); +extern void fixup_top_recursive(struct dentry *parent); +extern void fixup_perms_recursive(struct dentry *dentry, const char *name, size_t len); extern void update_derived_permission_lock(struct dentry *dentry); extern int need_graft_path(struct dentry *dentry); diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index 1d6490128c99..0a465395aab7 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -126,6 +126,7 @@ static void sdcardfs_evict_inode(struct inode *inode) */ lower_inode = sdcardfs_lower_inode(inode); sdcardfs_set_lower_inode(inode, NULL); + set_top(SDCARDFS_I(inode), inode); iput(lower_inode); } -- GitLab From 2dd0dbb879d41ee3c0efdb0626f3823dd3a05c3b Mon Sep 17 00:00:00 2001 From: alvin_liang Date: Mon, 19 Sep 2016 16:59:12 +0800 Subject: [PATCH 1215/1442] ANDROID: sdcardfs: fix external storage exporting incorrect uid Symptom: App cannot write into per-app folder Root Cause: sdcardfs exports incorrect uid Solution: fix uid Project: All Note: Test done by RD: passed Change-Id: Iff64f6f40ba4c679f07f4426d3db6e6d0db7e3ca --- fs/sdcardfs/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 6d526bf3d956..2decea3d1e3e 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -274,7 +274,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, sb_info->options.fs_low_uid, sb_info->options.fs_low_gid, 00755);*/ } else { - setup_derived_state(sb->s_root->d_inode, PERM_ROOT, sb_info->options.fs_low_uid, AID_ROOT, false, sb->s_root->d_inode); + setup_derived_state(d_inode(sb->s_root), PERM_ROOT, sb_info->options.fs_user_id, AID_ROOT, false, d_inode(sb->s_root)); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } fix_derived_permission(sb->s_root->d_inode); -- GitLab From 948041536c23bbde4fdcd77d9dc16156f0fc77e3 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 26 Sep 2016 14:48:22 -0700 Subject: [PATCH 1216/1442] ANDROID: sdcardfs: Move directory unlock before touch This removes a deadlock under low memory conditions. filp_open can call lookup_slow, which will attempt to lock the parent. Change-Id: I940643d0793f5051d1e79a56f4da2fa8ca3d8ff7 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/inode.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 67bcee2c379a..3c353c95ef3e 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -296,14 +296,17 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode copied_fs = copy_fs_struct(current->fs); if (!copied_fs) { err = -ENOMEM; + unlock_dir(lower_parent_dentry); goto out_unlock; } current->fs = copied_fs; current->fs->umask = 0; err = vfs_mkdir(d_inode(lower_parent_dentry), lower_dentry, mode); - if (err) + if (err) { + unlock_dir(lower_parent_dentry); goto out; + } /* if it is a local obb dentry, setup it with the base obbpath */ if(need_graft_path(dentry)) { @@ -325,14 +328,18 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode } err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pi->userid); - if (err) + if (err) { + unlock_dir(lower_parent_dentry); goto out; + } fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); /* update number of links on parent directory */ set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink); + unlock_dir(lower_parent_dentry); + if ((!sbi->options.multiuser) && (!strcasecmp(dentry->d_name.name, "obb")) && (pi->perm == PERM_ANDROID) && (pi->userid == 0)) make_nomedia_in_obb = 1; @@ -352,7 +359,6 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode current->fs = saved_fs; free_fs_struct(copied_fs); out_unlock: - unlock_dir(lower_parent_dentry); sdcardfs_put_lower_path(dentry, &lower_path); out_revert: REVERT_CRED(saved_cred); -- GitLab From 1aaf05b35058e3a85b5a8bc1b37a908192857813 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 26 Oct 2016 15:29:51 -0700 Subject: [PATCH 1217/1442] ANDROID: mnt: Add filesystem private data to mount points This starts to add private data associated directly to mount points. The intent is to give filesystems a sense of where they have come from, as a means of letting a filesystem take different actions based on this information. Change-Id: Ie769d7b3bb2f5972afe05c1bf16cf88c91647ab2 Signed-off-by: Daniel Rosenberg --- fs/namespace.c | 28 +++++++++++++++++++++++++++- fs/pnode.c | 14 ++++++++++++++ fs/pnode.h | 1 + include/linux/fs.h | 3 +++ include/linux/mount.h | 1 + 5 files changed, 46 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index e6c234b1a645..952b9e224cd0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -580,6 +580,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) static void free_vfsmnt(struct mount *mnt) { + kfree(mnt->mnt.data); kfree_const(mnt->mnt_devname); #ifdef CONFIG_SMP free_percpu(mnt->mnt_pcp); @@ -948,11 +949,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (!mnt) return ERR_PTR(-ENOMEM); + mnt->mnt.data = NULL; + if (type->alloc_mnt_data) { + mnt->mnt.data = type->alloc_mnt_data(); + if (!mnt->mnt.data) { + mnt_free_id(mnt); + free_vfsmnt(mnt); + return ERR_PTR(-ENOMEM); + } + } if (flags & MS_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; root = mount_fs(type, flags, name, data); if (IS_ERR(root)) { + kfree(mnt->mnt.data); mnt_free_id(mnt); free_vfsmnt(mnt); return ERR_CAST(root); @@ -980,6 +991,14 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, if (!mnt) return ERR_PTR(-ENOMEM); + if (sb->s_op->clone_mnt_data) { + mnt->mnt.data = sb->s_op->clone_mnt_data(old->mnt.data); + if (!mnt->mnt.data) { + err = -ENOMEM; + goto out_free; + } + } + if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE)) mnt->mnt_group_id = 0; /* not a peer of original */ else @@ -1048,6 +1067,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, return mnt; out_free: + kfree(mnt->mnt.data); mnt_free_id(mnt); free_vfsmnt(mnt); return ERR_PTR(err); @@ -2253,8 +2273,14 @@ static int do_remount(struct path *path, int flags, int mnt_flags, err = change_mount_flags(path->mnt, flags); else if (!capable(CAP_SYS_ADMIN)) err = -EPERM; - else + else { err = do_remount_sb(sb, flags, data, 0); + namespace_lock(); + lock_mount_hash(); + propagate_remount(mnt); + unlock_mount_hash(); + namespace_unlock(); + } if (!err) { lock_mount_hash(); mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; diff --git a/fs/pnode.c b/fs/pnode.c index 234a9ac49958..867a6cc2e3b8 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -458,3 +458,17 @@ int propagate_umount(struct list_head *list) __propagate_umount(mnt); return 0; } + +int propagate_remount(struct mount *mnt) { + struct mount *m; + struct super_block *sb = mnt->mnt.mnt_sb; + int ret = 0; + + if (sb->s_op->copy_mnt_data) { + for (m = first_slave(mnt); m->mnt_slave.next != &mnt->mnt_slave_list; m = next_slave(m)) { + sb->s_op->copy_mnt_data(m->mnt.data, mnt->mnt.data); + } + } + + return ret; +} diff --git a/fs/pnode.h b/fs/pnode.h index 550f5a8b4fcf..2c2736646810 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -44,6 +44,7 @@ int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, int propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); +int propagate_remount(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); unsigned int mnt_get_count(struct mount *mnt); diff --git a/include/linux/fs.h b/include/linux/fs.h index dc0478c07b2a..5a6339d97adc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1799,6 +1799,8 @@ struct super_operations { int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); + void *(*clone_mnt_data) (void *); + void (*copy_mnt_data) (void *, void *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); @@ -2035,6 +2037,7 @@ struct file_system_type { #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); + void *(*alloc_mnt_data) (void); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; diff --git a/include/linux/mount.h b/include/linux/mount.h index 1172cce949a4..4f333413934a 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -67,6 +67,7 @@ struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ int mnt_flags; + void *data; }; struct file; /* forward dec */ -- GitLab From 29f888e73023efb641046b612ef5cd529bfa7b7a Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 26 Oct 2016 15:58:22 -0700 Subject: [PATCH 1218/1442] ANDROID: vfs: Allow filesystems to access their private mount data Now we pass the vfsmount when mounting and remounting. This allows the filesystem to actually set up the mount specific data, although we can't quite do anything with it yet. show_options is expanded to include data that lives with the mount. To avoid changing existing filesystems, these have been added as new vfs functions. Change-Id: If80670bfad9f287abb8ac22457e1b034c9697097 Signed-off-by: Daniel Rosenberg --- fs/internal.h | 4 +++- fs/namespace.c | 4 ++-- fs/proc_namespace.c | 8 ++++++-- fs/super.c | 28 +++++++++++++++++++++++----- include/linux/fs.h | 4 ++++ 5 files changed, 38 insertions(+), 10 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index f4da3341b4a3..15fe2aac4e2c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -88,9 +88,11 @@ extern struct file *get_empty_filp(void); * super.c */ extern int do_remount_sb(struct super_block *, int, void *, int); +extern int do_remount_sb2(struct vfsmount *, struct super_block *, int, + void *, int); extern bool trylock_super(struct super_block *sb); extern struct dentry *mount_fs(struct file_system_type *, - int, const char *, void *); + int, const char *, struct vfsmount *, void *); extern struct super_block *user_get_super(dev_t); /* diff --git a/fs/namespace.c b/fs/namespace.c index 952b9e224cd0..530d30d29640 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -961,7 +961,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (flags & MS_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; - root = mount_fs(type, flags, name, data); + root = mount_fs(type, flags, name, &mnt->mnt, data); if (IS_ERR(root)) { kfree(mnt->mnt.data); mnt_free_id(mnt); @@ -2274,7 +2274,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else if (!capable(CAP_SYS_ADMIN)) err = -EPERM; else { - err = do_remount_sb(sb, flags, data, 0); + err = do_remount_sb2(path->mnt, sb, flags, data, 0); namespace_lock(); lock_mount_hash(); propagate_remount(mnt); diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 3f1190d18991..6863773aff25 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -118,7 +118,9 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) if (err) goto out; show_mnt_opts(m, mnt); - if (sb->s_op->show_options) + if (sb->s_op->show_options2) + err = sb->s_op->show_options2(mnt, m, mnt_path.dentry); + else if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt_path.dentry); seq_puts(m, " 0 0\n"); out: @@ -180,7 +182,9 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) err = show_sb_opts(m, sb); if (err) goto out; - if (sb->s_op->show_options) + if (sb->s_op->show_options2) { + err = sb->s_op->show_options2(mnt, m, mnt->mnt_root); + } else if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt->mnt_root); seq_putc(m, '\n'); out: diff --git a/fs/super.c b/fs/super.c index 0bed501a60be..719579f5e98e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -750,7 +750,8 @@ struct super_block *user_get_super(dev_t dev) } /** - * do_remount_sb - asks filesystem to change mount options. + * do_remount_sb2 - asks filesystem to change mount options. + * @mnt: mount we are looking at * @sb: superblock in question * @flags: numeric part of options * @data: the rest of options @@ -758,7 +759,7 @@ struct super_block *user_get_super(dev_t dev) * * Alters the mount options of a mounted file system. */ -int do_remount_sb(struct super_block *sb, int flags, void *data, int force) +int do_remount_sb2(struct vfsmount *mnt, struct super_block *sb, int flags, void *data, int force) { int retval; int remount_ro; @@ -800,7 +801,16 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) } } - if (sb->s_op->remount_fs) { + if (mnt && sb->s_op->remount_fs2) { + retval = sb->s_op->remount_fs2(mnt, sb, &flags, data); + if (retval) { + if (!force) + goto cancel_readonly; + /* If forced remount, go ahead despite any errors */ + WARN(1, "forced remount of a %s fs returned %i\n", + sb->s_type->name, retval); + } + } else if (sb->s_op->remount_fs) { retval = sb->s_op->remount_fs(sb, &flags, data); if (retval) { if (!force) @@ -832,6 +842,11 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) return retval; } +int do_remount_sb(struct super_block *sb, int flags, void *data, int force) +{ + return do_remount_sb2(NULL, sb, flags, data, force); +} + static void do_emergency_remount(struct work_struct *work) { struct super_block *sb, *p = NULL; @@ -1157,7 +1172,7 @@ struct dentry *mount_single(struct file_system_type *fs_type, EXPORT_SYMBOL(mount_single); struct dentry * -mount_fs(struct file_system_type *type, int flags, const char *name, void *data) +mount_fs(struct file_system_type *type, int flags, const char *name, struct vfsmount *mnt, void *data) { struct dentry *root; struct super_block *sb; @@ -1174,7 +1189,10 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) goto out_free_secdata; } - root = type->mount(type, flags, name, data); + if (type->mount2) + root = type->mount2(mnt, type, flags, name, data); + else + root = type->mount(type, flags, name, data); if (IS_ERR(root)) { error = PTR_ERR(root); goto out_free_secdata; diff --git a/include/linux/fs.h b/include/linux/fs.h index 5a6339d97adc..e811c804b86a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1799,11 +1799,13 @@ struct super_operations { int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); + int (*remount_fs2) (struct vfsmount *, struct super_block *, int *, char *); void *(*clone_mnt_data) (void *); void (*copy_mnt_data) (void *, void *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); + int (*show_options2)(struct vfsmount *,struct seq_file *, struct dentry *); int (*show_devname)(struct seq_file *, struct dentry *); int (*show_path)(struct seq_file *, struct dentry *); int (*show_stats)(struct seq_file *, struct dentry *); @@ -2037,6 +2039,8 @@ struct file_system_type { #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); + struct dentry *(*mount2) (struct vfsmount *, struct file_system_type *, int, + const char *, void *); void *(*alloc_mnt_data) (void); void (*kill_sb) (struct super_block *); struct module *owner; -- GitLab From 2757e9bdf498015409dba42e5a1e5f1c40a263c0 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 26 Oct 2016 16:33:11 -0700 Subject: [PATCH 1219/1442] ANDROID: vfs: Add setattr2 for filesystems with per mount permissions This allows filesystems to use their mount private data to influence the permssions they use in setattr2. It has been separated into a new call to avoid disrupting current setattr users. Change-Id: I19959038309284448f1b7f232d579674ef546385 Signed-off-by: Daniel Rosenberg --- fs/attr.c | 12 ++++++++++-- fs/coredump.c | 2 +- fs/inode.c | 6 +++--- fs/namei.c | 2 +- fs/open.c | 21 ++++++++++++++------- fs/utimes.c | 2 +- include/linux/fs.h | 4 ++++ 7 files changed, 34 insertions(+), 15 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index c902b3d53508..20d680d3eef4 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -200,7 +200,7 @@ EXPORT_SYMBOL(setattr_copy); * the file open for write, as there can be no conflicting delegation in * that case. */ -int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) +int notify_change2(struct vfsmount *mnt, struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; umode_t mode = inode->i_mode; @@ -307,7 +307,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de if (error) return error; - if (inode->i_op->setattr) + if (mnt && inode->i_op->setattr2) + error = inode->i_op->setattr2(mnt, dentry, attr); + else if (inode->i_op->setattr) error = inode->i_op->setattr(dentry, attr); else error = simple_setattr(dentry, attr); @@ -320,4 +322,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de return error; } +EXPORT_SYMBOL(notify_change2); + +int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) +{ + return notify_change2(NULL, dentry, attr, delegated_inode); +} EXPORT_SYMBOL(notify_change); diff --git a/fs/coredump.c b/fs/coredump.c index eb9c92c9b20f..8bdda8e660d8 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -744,7 +744,7 @@ void do_coredump(const siginfo_t *siginfo) goto close_fail; if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) goto close_fail; - if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) + if (do_truncate2(cprm.file->f_path.mnt, cprm.file->f_path.dentry, 0, 0, cprm.file)) goto close_fail; } diff --git a/fs/inode.c b/fs/inode.c index 88110fd0b282..0aaebd1de454 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1780,7 +1780,7 @@ int dentry_needs_remove_privs(struct dentry *dentry) return mask; } -static int __remove_privs(struct dentry *dentry, int kill) +static int __remove_privs(struct vfsmount *mnt, struct dentry *dentry, int kill) { struct iattr newattrs; @@ -1789,7 +1789,7 @@ static int __remove_privs(struct dentry *dentry, int kill) * Note we call this on write, so notify_change will not * encounter any conflicting delegations: */ - return notify_change(dentry, &newattrs, NULL); + return notify_change2(mnt, dentry, &newattrs, NULL); } /* @@ -1811,7 +1811,7 @@ int file_remove_privs(struct file *file) if (kill < 0) return kill; if (kill) - error = __remove_privs(dentry, kill); + error = __remove_privs(file->f_path.mnt, dentry, kill); if (!error) inode_has_no_xattr(inode); diff --git a/fs/namei.c b/fs/namei.c index 5b4eed221530..03d427ccd226 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2957,7 +2957,7 @@ static int handle_truncate(struct file *filp) if (!error) error = security_path_truncate(path); if (!error) { - error = do_truncate(path->dentry, 0, + error = do_truncate2(path->mnt, path->dentry, 0, ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, filp); } diff --git a/fs/open.c b/fs/open.c index d3ed8171e8e0..53efcd329b5b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -34,8 +34,8 @@ #include "internal.h" -int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, - struct file *filp) +int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length, + unsigned int time_attrs, struct file *filp) { int ret; struct iattr newattrs; @@ -60,10 +60,15 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, inode_lock(dentry->d_inode); /* Note any delegations or leases have already been broken: */ - ret = notify_change(dentry, &newattrs, NULL); + ret = notify_change2(mnt, dentry, &newattrs, NULL); inode_unlock(dentry->d_inode); return ret; } +int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, + struct file *filp) +{ + return do_truncate2(NULL, dentry, length, time_attrs, filp); +} long vfs_truncate(const struct path *path, loff_t length) { @@ -117,7 +122,7 @@ long vfs_truncate(const struct path *path, loff_t length) if (!error) error = security_path_truncate(path); if (!error) - error = do_truncate(path->dentry, length, 0, NULL); + error = do_truncate2(mnt, path->dentry, length, 0, NULL); put_write_and_out: put_write_access(upperdentry->d_inode); @@ -166,6 +171,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode *inode; struct dentry *dentry; + struct vfsmount *mnt; struct fd f; int error; @@ -182,6 +188,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) small = 0; dentry = f.file->f_path.dentry; + mnt = f.file->f_path.mnt; inode = dentry->d_inode; error = -EINVAL; if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) @@ -201,7 +208,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) if (!error) error = security_path_truncate(&f.file->f_path); if (!error) - error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); + error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); sb_end_write(inode->i_sb); out_putf: fdput(f); @@ -533,7 +540,7 @@ static int chmod_common(const struct path *path, umode_t mode) goto out_unlock; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); out_unlock: inode_unlock(inode); if (delegated_inode) { @@ -613,7 +620,7 @@ static int chown_common(const struct path *path, uid_t user, gid_t group) inode_lock(inode); error = security_path_chown(path, uid, gid); if (!error) - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); diff --git a/fs/utimes.c b/fs/utimes.c index 22307cdf7014..87ce37bcaa84 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -91,7 +91,7 @@ static int utimes_common(struct path *path, struct timespec *times) } retry_deleg: inode_lock(inode); - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index e811c804b86a..49d4832d3d2f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1751,6 +1751,7 @@ struct inode_operations { int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); + int (*setattr2) (struct vfsmount *, struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, @@ -2340,6 +2341,8 @@ struct filename { extern long vfs_truncate(const struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); +extern int do_truncate2(struct vfsmount *, struct dentry *, loff_t start, + unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len); extern long do_sys_open(int dfd, const char __user *filename, int flags, @@ -2582,6 +2585,7 @@ extern void emergency_remount(void); extern sector_t bmap(struct inode *, sector_t); #endif extern int notify_change(struct dentry *, struct iattr *, struct inode **); +extern int notify_change2(struct vfsmount *, struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); extern int __inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); -- GitLab From bbcd0ffae56c121c0618a2b7df875cc8104a4680 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 26 Oct 2016 16:27:45 -0700 Subject: [PATCH 1220/1442] ANDROID: vfs: Add permission2 for filesystems with per mount permissions This allows filesystems to use their mount private data to influence the permssions they return in permission2. It has been separated into a new call to avoid disrupting current permission users. Change-Id: I9d416e3b8b6eca84ef3e336bd2af89ddd51df6ca Signed-off-by: Daniel Rosenberg --- fs/attr.c | 2 +- fs/exec.c | 2 +- fs/namei.c | 172 +++++++++++++++++++++-------- fs/notify/fanotify/fanotify_user.c | 2 +- fs/notify/inotify/inotify_user.c | 2 +- fs/open.c | 16 ++- include/linux/fs.h | 11 ++ include/linux/namei.h | 1 + ipc/mqueue.c | 10 +- security/inode.c | 2 +- 10 files changed, 156 insertions(+), 64 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index 20d680d3eef4..c4093c5196be 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -224,7 +224,7 @@ int notify_change2(struct vfsmount *mnt, struct dentry * dentry, struct iattr * return -EPERM; if (!inode_owner_or_capable(inode)) { - error = inode_permission(inode, MAY_WRITE); + error = inode_permission2(mnt, inode, MAY_WRITE); if (error) return error; } diff --git a/fs/exec.c b/fs/exec.c index 4e497b9ee71e..c654feecab72 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1275,7 +1275,7 @@ EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { - if (inode_permission(file_inode(file), MAY_READ) < 0) + if (inode_permission2(file->f_path.mnt, file_inode(file), MAY_READ) < 0) bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; } EXPORT_SYMBOL(would_dump); diff --git a/fs/namei.c b/fs/namei.c index 03d427ccd226..dc469b4648c2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -375,9 +375,11 @@ EXPORT_SYMBOL(generic_permission); * flag in inode->i_opflags, that says "this has not special * permission function, use the fast case". */ -static inline int do_inode_permission(struct inode *inode, int mask) +static inline int do_inode_permission(struct vfsmount *mnt, struct inode *inode, int mask) { if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { + if (likely(mnt && inode->i_op->permission2)) + return inode->i_op->permission2(mnt, inode, mask); if (likely(inode->i_op->permission)) return inode->i_op->permission(inode, mask); @@ -401,7 +403,7 @@ static inline int do_inode_permission(struct inode *inode, int mask) * This does not check for a read-only file system. You probably want * inode_permission(). */ -int __inode_permission(struct inode *inode, int mask) +int __inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask) { int retval; @@ -421,7 +423,7 @@ int __inode_permission(struct inode *inode, int mask) return -EACCES; } - retval = do_inode_permission(inode, mask); + retval = do_inode_permission(mnt, inode, mask); if (retval) return retval; @@ -429,7 +431,14 @@ int __inode_permission(struct inode *inode, int mask) if (retval) return retval; - return security_inode_permission(inode, mask); + retval = security_inode_permission(inode, mask); + return retval; +} +EXPORT_SYMBOL(__inode_permission2); + +int __inode_permission(struct inode *inode, int mask) +{ + return __inode_permission2(NULL, inode, mask); } EXPORT_SYMBOL(__inode_permission); @@ -465,14 +474,20 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask) * * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. */ -int inode_permission(struct inode *inode, int mask) +int inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask) { int retval; retval = sb_permission(inode->i_sb, inode, mask); if (retval) return retval; - return __inode_permission(inode, mask); + return __inode_permission2(mnt, inode, mask); +} +EXPORT_SYMBOL(inode_permission2); + +int inode_permission(struct inode *inode, int mask) +{ + return inode_permission2(NULL, inode, mask); } EXPORT_SYMBOL(inode_permission); @@ -1669,13 +1684,13 @@ static struct dentry *lookup_slow(const struct qstr *name, static inline int may_lookup(struct nameidata *nd) { if (nd->flags & LOOKUP_RCU) { - int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); + int err = inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC|MAY_NOT_BLOCK); if (err != -ECHILD) return err; if (unlazy_walk(nd, NULL, 0)) return -ECHILD; } - return inode_permission(nd->inode, MAY_EXEC); + return inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC); } static inline int handle_dots(struct nameidata *nd, int type) @@ -2146,11 +2161,12 @@ static const char *path_init(struct nameidata *nd, unsigned flags) nd->depth = 0; if (flags & LOOKUP_ROOT) { struct dentry *root = nd->root.dentry; + struct vfsmount *mnt = nd->root.mnt; struct inode *inode = root->d_inode; if (*s) { if (!d_can_lookup(root)) return ERR_PTR(-ENOTDIR); - retval = inode_permission(inode, MAY_EXEC); + retval = inode_permission2(mnt, inode, MAY_EXEC); if (retval) return ERR_PTR(retval); } @@ -2415,6 +2431,7 @@ EXPORT_SYMBOL(vfs_path_lookup); /** * lookup_one_len - filesystem helper to lookup single pathname component * @name: pathname component to lookup + * @mnt: mount we are looking up on * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * @@ -2423,7 +2440,7 @@ EXPORT_SYMBOL(vfs_path_lookup); * * The caller must hold base->i_mutex. */ -struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +struct dentry *lookup_one_len2(const char *name, struct vfsmount *mnt, struct dentry *base, int len) { struct qstr this; unsigned int c; @@ -2457,12 +2474,18 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) return ERR_PTR(err); } - err = inode_permission(base->d_inode, MAY_EXEC); + err = inode_permission2(mnt, base->d_inode, MAY_EXEC); if (err) return ERR_PTR(err); return __lookup_hash(&this, base, 0); } +EXPORT_SYMBOL(lookup_one_len2); + +struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) +{ + return lookup_one_len2(name, NULL, base, len); +} EXPORT_SYMBOL(lookup_one_len); /** @@ -2765,7 +2788,7 @@ EXPORT_SYMBOL(__check_sticky); * 11. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) +static int may_delete(struct vfsmount *mnt, struct inode *dir, struct dentry *victim, bool isdir) { struct inode *inode = d_backing_inode(victim); int error; @@ -2777,7 +2800,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) BUG_ON(victim->d_parent->d_inode != dir); audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); - error = inode_permission(dir, MAY_WRITE | MAY_EXEC); + error = inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (IS_APPEND(dir)) @@ -2809,7 +2832,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) * 4. We should have write and exec permissions on dir * 5. We can't do it if dir is immutable (done in permission()) */ -static inline int may_create(struct inode *dir, struct dentry *child) +static inline int may_create(struct vfsmount *mnt, struct inode *dir, struct dentry *child) { struct user_namespace *s_user_ns; audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); @@ -2821,7 +2844,7 @@ static inline int may_create(struct inode *dir, struct dentry *child) if (!kuid_has_mapping(s_user_ns, current_fsuid()) || !kgid_has_mapping(s_user_ns, current_fsgid())) return -EOVERFLOW; - return inode_permission(dir, MAY_WRITE | MAY_EXEC); + return inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC); } /* @@ -2868,10 +2891,10 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) } EXPORT_SYMBOL(unlock_rename); -int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool want_excl) +int vfs_create2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, + umode_t mode, bool want_excl) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); if (error) return error; @@ -2887,6 +2910,13 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_create2); + +int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool want_excl) +{ + return vfs_create2(NULL, dir, dentry, mode, want_excl); +} EXPORT_SYMBOL(vfs_create); bool may_open_dev(const struct path *path) @@ -2898,6 +2928,7 @@ bool may_open_dev(const struct path *path) static int may_open(struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; + struct vfsmount *mnt = path->mnt; struct inode *inode = dentry->d_inode; int error; @@ -2922,7 +2953,7 @@ static int may_open(struct path *path, int acc_mode, int flag) break; } - error = inode_permission(inode, MAY_OPEN | acc_mode); + error = inode_permission2(mnt, inode, MAY_OPEN | acc_mode); if (error) return error; @@ -2978,7 +3009,7 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m if (error) return error; - error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); + error = inode_permission2(dir->mnt, dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); if (error) return error; @@ -3409,7 +3440,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, goto out; dir = path.dentry->d_inode; /* we want directory to be writable */ - error = inode_permission(dir, MAY_WRITE | MAY_EXEC); + error = inode_permission2(nd->path.mnt, dir, MAY_WRITE | MAY_EXEC); if (error) goto out2; if (!dir->i_op->tmpfile) { @@ -3662,9 +3693,9 @@ inline struct dentry *user_path_create(int dfd, const char __user *pathname, } EXPORT_SYMBOL(user_path_create); -int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) +int vfs_mknod2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); if (error) return error; @@ -3688,6 +3719,12 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_mknod2); + +int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) +{ + return vfs_mknod2(NULL, dir, dentry, mode, dev); +} EXPORT_SYMBOL(vfs_mknod); static int may_mknod(umode_t mode) @@ -3730,12 +3767,12 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, goto out; switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(path.dentry->d_inode,dentry,mode,true); + error = vfs_create2(path.mnt, path.dentry->d_inode,dentry,mode,true); if (!error) ima_post_path_mknod(dentry); break; case S_IFCHR: case S_IFBLK: - error = vfs_mknod(path.dentry->d_inode,dentry,mode, + error = vfs_mknod2(path.mnt, path.dentry->d_inode,dentry,mode, new_decode_dev(dev)); break; case S_IFIFO: case S_IFSOCK: @@ -3756,9 +3793,9 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d return sys_mknodat(AT_FDCWD, filename, mode, dev); } -int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +int vfs_mkdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); unsigned max_links = dir->i_sb->s_max_links; if (error) @@ -3780,6 +3817,12 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fsnotify_mkdir(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_mkdir2); + +int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + return vfs_mkdir2(NULL, dir, dentry, mode); +} EXPORT_SYMBOL(vfs_mkdir); SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) @@ -3798,7 +3841,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) mode &= ~current_umask(); error = security_path_mkdir(&path, dentry, mode); if (!error) - error = vfs_mkdir(path.dentry->d_inode, dentry, mode); + error = vfs_mkdir2(path.mnt, path.dentry->d_inode, dentry, mode); done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -3812,9 +3855,9 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) return sys_mkdirat(AT_FDCWD, pathname, mode); } -int vfs_rmdir(struct inode *dir, struct dentry *dentry) +int vfs_rmdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry) { - int error = may_delete(dir, dentry, 1); + int error = may_delete(mnt, dir, dentry, 1); if (error) return error; @@ -3849,6 +3892,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) d_delete(dentry); return error; } +int vfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + return vfs_rmdir2(NULL, dir, dentry); +} EXPORT_SYMBOL(vfs_rmdir); static long do_rmdir(int dfd, const char __user *pathname) @@ -3894,7 +3941,7 @@ static long do_rmdir(int dfd, const char __user *pathname) error = security_path_rmdir(&path, dentry); if (error) goto exit3; - error = vfs_rmdir(path.dentry->d_inode, dentry); + error = vfs_rmdir2(path.mnt, path.dentry->d_inode, dentry); exit3: dput(dentry); exit2: @@ -3933,10 +3980,10 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. */ -int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) +int vfs_unlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) { struct inode *target = dentry->d_inode; - int error = may_delete(dir, dentry, 0); + int error = may_delete(mnt, dir, dentry, 0); if (error) return error; @@ -3971,6 +4018,12 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate return error; } +EXPORT_SYMBOL(vfs_unlink2); + +int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) +{ + return vfs_unlink2(NULL, dir, dentry, delegated_inode); +} EXPORT_SYMBOL(vfs_unlink); /* @@ -4018,7 +4071,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = security_path_unlink(&path, dentry); if (error) goto exit2; - error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode); + error = vfs_unlink2(path.mnt, path.dentry->d_inode, dentry, &delegated_inode); exit2: dput(dentry); } @@ -4068,9 +4121,9 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) return do_unlinkat(AT_FDCWD, pathname); } -int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) +int vfs_symlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, const char *oldname) { - int error = may_create(dir, dentry); + int error = may_create(mnt, dir, dentry); if (error) return error; @@ -4087,6 +4140,12 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) fsnotify_create(dir, dentry); return error; } +EXPORT_SYMBOL(vfs_symlink2); + +int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) +{ + return vfs_symlink2(NULL, dir, dentry, oldname); +} EXPORT_SYMBOL(vfs_symlink); SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, @@ -4109,7 +4168,7 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, error = security_path_symlink(&path, dentry, from->name); if (!error) - error = vfs_symlink(path.dentry->d_inode, dentry, from->name); + error = vfs_symlink2(path.mnt, path.dentry->d_inode, dentry, from->name); done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -4144,7 +4203,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. */ -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) +int vfs_link2(struct vfsmount *mnt, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) { struct inode *inode = old_dentry->d_inode; unsigned max_links = dir->i_sb->s_max_links; @@ -4153,7 +4212,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de if (!inode) return -ENOENT; - error = may_create(dir, new_dentry); + error = may_create(mnt, dir, new_dentry); if (error) return error; @@ -4203,6 +4262,12 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de fsnotify_link(dir, inode, new_dentry); return error; } +EXPORT_SYMBOL(vfs_link2); + +int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) +{ + return vfs_link2(NULL, old_dentry, dir, new_dentry, delegated_inode); +} EXPORT_SYMBOL(vfs_link); /* @@ -4258,7 +4323,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; - error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode); + error = vfs_link2(old_path.mnt, old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode); out_dput: done_path_create(&new_path, new_dentry); if (delegated_inode) { @@ -4333,7 +4398,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * ->i_mutex on parents, which works but leads to some truly excessive * locking]. */ -int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, +int vfs_rename2(struct vfsmount *mnt, + struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, struct inode **delegated_inode, unsigned int flags) { @@ -4352,19 +4418,19 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (d_real_inode(old_dentry) == d_real_inode(new_dentry)) return 0; - error = may_delete(old_dir, old_dentry, is_dir); + error = may_delete(mnt, old_dir, old_dentry, is_dir); if (error) return error; if (!target) { - error = may_create(new_dir, new_dentry); + error = may_create(mnt, new_dir, new_dentry); } else { new_is_dir = d_is_dir(new_dentry); if (!(flags & RENAME_EXCHANGE)) - error = may_delete(new_dir, new_dentry, is_dir); + error = may_delete(mnt, new_dir, new_dentry, is_dir); else - error = may_delete(new_dir, new_dentry, new_is_dir); + error = may_delete(mnt, new_dir, new_dentry, new_is_dir); } if (error) return error; @@ -4378,12 +4444,12 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, */ if (new_dir != old_dir) { if (is_dir) { - error = inode_permission(source, MAY_WRITE); + error = inode_permission2(mnt, source, MAY_WRITE); if (error) return error; } if ((flags & RENAME_EXCHANGE) && new_is_dir) { - error = inode_permission(target, MAY_WRITE); + error = inode_permission2(mnt, target, MAY_WRITE); if (error) return error; } @@ -4460,6 +4526,14 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, return error; } +EXPORT_SYMBOL(vfs_rename2); + +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + struct inode **delegated_inode, unsigned int flags) +{ + return vfs_rename2(NULL, old_dir, old_dentry, new_dir, new_dentry, delegated_inode, flags); +} EXPORT_SYMBOL(vfs_rename); SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, @@ -4573,7 +4647,7 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, &new_path, new_dentry, flags); if (error) goto exit5; - error = vfs_rename(old_path.dentry->d_inode, old_dentry, + error = vfs_rename2(old_path.mnt, old_path.dentry->d_inode, old_dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode, flags); exit5: @@ -4618,7 +4692,7 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna int vfs_whiteout(struct inode *dir, struct dentry *dentry) { - int error = may_create(dir, dentry); + int error = may_create(NULL, dir, dentry); if (error) return error; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 7ebfca6a1427..7f99c96014b3 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -488,7 +488,7 @@ static int fanotify_find_path(int dfd, const char __user *filename, } /* you can only watch an inode if you have read permissions on it */ - ret = inode_permission(path->dentry->d_inode, MAY_READ); + ret = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ); if (ret) path_put(path); out: diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 4dc09da062c6..4da5c6a1134f 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -337,7 +337,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns if (error) return error; /* you can only watch an inode if you have read permissions on it */ - error = inode_permission(path->dentry->d_inode, MAY_READ); + error = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ); if (error) path_put(path); return error; diff --git a/fs/open.c b/fs/open.c index 53efcd329b5b..568749b035fa 100644 --- a/fs/open.c +++ b/fs/open.c @@ -73,10 +73,12 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, long vfs_truncate(const struct path *path, loff_t length) { struct inode *inode; + struct vfsmount *mnt; struct dentry *upperdentry; long error; inode = path->dentry->d_inode; + mnt = path->mnt; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ if (S_ISDIR(inode->i_mode)) @@ -88,7 +90,7 @@ long vfs_truncate(const struct path *path, loff_t length) if (error) goto out; - error = inode_permission(inode, MAY_WRITE); + error = inode_permission2(mnt, inode, MAY_WRITE); if (error) goto mnt_drop_write_and_out; @@ -364,6 +366,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) struct cred *override_cred; struct path path; struct inode *inode; + struct vfsmount *mnt; int res; unsigned int lookup_flags = LOOKUP_FOLLOW; @@ -394,6 +397,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) goto out; inode = d_backing_inode(path.dentry); + mnt = path.mnt; if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { /* @@ -405,7 +409,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) goto out_path_release; } - res = inode_permission(inode, mode | MAY_ACCESS); + res = inode_permission2(mnt, inode, mode | MAY_ACCESS); /* SuS v2 requires we report a read only fs too */ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) goto out_path_release; @@ -449,7 +453,7 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename) if (error) goto out; - error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; @@ -469,6 +473,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct fd f = fdget_raw(fd); struct inode *inode; + struct vfsmount *mnt; int error = -EBADF; error = -EBADF; @@ -476,12 +481,13 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) goto out; inode = file_inode(f.file); + mnt = f.file->f_path.mnt; error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) goto out_putf; - error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); + error = inode_permission2(mnt, inode, MAY_EXEC | MAY_CHDIR); if (!error) set_fs_pwd(current->fs, &f.file->f_path); out_putf: @@ -500,7 +506,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) if (error) goto out; - error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); + error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; diff --git a/include/linux/fs.h b/include/linux/fs.h index 49d4832d3d2f..bed7a849371d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1603,13 +1603,21 @@ extern bool inode_owner_or_capable(const struct inode *inode); * VFS helper functions.. */ extern int vfs_create(struct inode *, struct dentry *, umode_t, bool); +extern int vfs_create2(struct vfsmount *, struct inode *, struct dentry *, umode_t, bool); extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); +extern int vfs_mkdir2(struct vfsmount *, struct inode *, struct dentry *, umode_t); extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); +extern int vfs_mknod2(struct vfsmount *, struct inode *, struct dentry *, umode_t, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); +extern int vfs_symlink2(struct vfsmount *, struct inode *, struct dentry *, const char *); extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **); +extern int vfs_link2(struct vfsmount *, struct dentry *, struct inode *, struct dentry *, struct inode **); extern int vfs_rmdir(struct inode *, struct dentry *); +extern int vfs_rmdir2(struct vfsmount *, struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); +extern int vfs_unlink2(struct vfsmount *, struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); +extern int vfs_rename2(struct vfsmount *, struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); extern int vfs_whiteout(struct inode *, struct dentry *); /* @@ -1737,6 +1745,7 @@ struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct inode *, int); + int (*permission2) (struct vfsmount *, struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); int (*readlink) (struct dentry *, char __user *,int); @@ -2587,7 +2596,9 @@ extern sector_t bmap(struct inode *, sector_t); extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int notify_change2(struct vfsmount *, struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); +extern int inode_permission2(struct vfsmount *, struct inode *, int); extern int __inode_permission(struct inode *, int); +extern int __inode_permission2(struct vfsmount *, struct inode *, int); extern int generic_permission(struct inode *, int); extern int __check_sticky(struct inode *dir, struct inode *inode); diff --git a/include/linux/namei.h b/include/linux/namei.h index a2866f6073e1..cf437f56baf4 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -82,6 +82,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); +extern struct dentry *lookup_one_len2(const char *, struct vfsmount *mnt, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); extern int follow_down_one(struct path *); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 8cbd6e6894d5..a37a10bc07ea 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -745,7 +745,7 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, } mode &= ~current_umask(); - ret = vfs_create(dir, path->dentry, mode, true); + ret = vfs_create2(path->mnt, dir, path->dentry, mode, true); path->dentry->d_fsdata = NULL; if (ret) return ERR_PTR(ret); @@ -761,7 +761,7 @@ static struct file *do_open(struct path *path, int oflag) if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) return ERR_PTR(-EINVAL); acc = oflag2acc[oflag & O_ACCMODE]; - if (inode_permission(d_inode(path->dentry), acc)) + if (inode_permission2(path->mnt, d_inode(path->dentry), acc)) return ERR_PTR(-EACCES); return dentry_open(path, oflag, current_cred()); } @@ -794,7 +794,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, ro = mnt_want_write(mnt); /* we'll drop it in any case */ error = 0; inode_lock(d_inode(root)); - path.dentry = lookup_one_len(name->name, root, strlen(name->name)); + path.dentry = lookup_one_len2(name->name, mnt, root, strlen(name->name)); if (IS_ERR(path.dentry)) { error = PTR_ERR(path.dentry); goto out_putfd; @@ -865,7 +865,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) if (err) goto out_name; inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT); - dentry = lookup_one_len(name->name, mnt->mnt_root, + dentry = lookup_one_len2(name->name, mnt, mnt->mnt_root, strlen(name->name)); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); @@ -877,7 +877,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) err = -ENOENT; } else { ihold(inode); - err = vfs_unlink(d_inode(dentry->d_parent), dentry, NULL); + err = vfs_unlink2(mnt, d_inode(dentry->d_parent), dentry, NULL); } dput(dentry); diff --git a/security/inode.c b/security/inode.c index c83db05c15ab..b4531f2be0f1 100644 --- a/security/inode.c +++ b/security/inode.c @@ -100,7 +100,7 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode, dir = d_inode(parent); inode_lock(dir); - dentry = lookup_one_len(name, parent, strlen(name)); + dentry = lookup_one_len2(name, mount, parent, strlen(name)); if (IS_ERR(dentry)) goto out; -- GitLab From 1844d9e62363c346b020c21575c77ff8fe8c2331 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 26 Oct 2016 16:48:45 -0700 Subject: [PATCH 1221/1442] ANDROID: sdcardfs: User new permission2 functions Change-Id: Ic7e0fb8fdcebb31e657b079fe02ac834c4a50db9 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/inode.c | 25 +++++++++++++++++++------ fs/sdcardfs/sdcardfs.h | 4 ++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 3c353c95ef3e..dc64c9e2f5e7 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -54,6 +54,7 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, { int err; struct dentry *lower_dentry; + struct vfsmount *lower_dentry_mnt; struct dentry *lower_parent_dentry = NULL; struct path lower_path; const struct cred *saved_cred = NULL; @@ -73,6 +74,7 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; + lower_dentry_mnt = lower_path.mnt; lower_parent_dentry = lock_parent(lower_dentry); /* set last 16bytes of mode field to 0664 */ @@ -87,7 +89,7 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, } current->fs = copied_fs; current->fs->umask = 0; - err = vfs_create(d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); + err = vfs_create2(lower_dentry_mnt, d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); if (err) goto out; @@ -154,6 +156,7 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) { int err; struct dentry *lower_dentry; + struct vfsmount *lower_mnt; struct inode *lower_dir_inode = sdcardfs_lower_inode(dir); struct dentry *lower_dir_dentry; struct path lower_path; @@ -172,10 +175,11 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); - err = vfs_unlink(lower_dir_inode, lower_dentry, NULL); + err = vfs_unlink2(lower_mnt, lower_dir_inode, lower_dentry, NULL); /* * Note: unlinking on top of NFS can cause silly-renamed files. @@ -256,6 +260,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode int err; int make_nomedia_in_obb = 0; struct dentry *lower_dentry; + struct vfsmount *lower_mnt; struct dentry *lower_parent_dentry = NULL; struct path lower_path; struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); @@ -286,6 +291,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* the lower_dentry is negative here */ sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; lower_parent_dentry = lock_parent(lower_dentry); /* set last 16bytes of mode field to 0775 */ @@ -301,7 +307,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode } current->fs = copied_fs; current->fs->umask = 0; - err = vfs_mkdir(d_inode(lower_parent_dentry), lower_dentry, mode); + err = vfs_mkdir2(lower_mnt, d_inode(lower_parent_dentry), lower_dentry, mode); if (err) { unlock_dir(lower_parent_dentry); @@ -370,6 +376,7 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) { struct dentry *lower_dentry; struct dentry *lower_dir_dentry; + struct vfsmount *lower_mnt; int err; struct path lower_path; const struct cred *saved_cred = NULL; @@ -390,9 +397,10 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) sdcardfs_get_real_lower(dentry, &lower_path); lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; lower_dir_dentry = lock_parent(lower_dentry); - err = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry); + err = vfs_rmdir2(lower_mnt, d_inode(lower_dir_dentry), lower_dentry); if (err) goto out; @@ -456,6 +464,7 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_new_dentry = NULL; struct dentry *lower_old_dir_dentry = NULL; struct dentry *lower_new_dir_dentry = NULL; + struct vfsmount *lower_mnt = NULL; struct dentry *trap = NULL; struct dentry *new_parent = NULL; struct path lower_old_path, lower_new_path; @@ -477,6 +486,7 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, sdcardfs_get_lower_path(new_dentry, &lower_new_path); lower_old_dentry = lower_old_path.dentry; lower_new_dentry = lower_new_path.dentry; + lower_mnt = lower_old_path.mnt; lower_old_dir_dentry = dget_parent(lower_old_dentry); lower_new_dir_dentry = dget_parent(lower_new_dentry); @@ -492,7 +502,8 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } - err = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry, + err = vfs_rename2(lower_mnt, + d_inode(lower_old_dir_dentry), lower_old_dentry, d_inode(lower_new_dir_dentry), lower_new_dentry, NULL, 0); if (err) @@ -642,6 +653,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) { int err; struct dentry *lower_dentry; + struct vfsmount *lower_mnt; struct inode *inode; struct inode *lower_inode; struct path lower_path; @@ -675,6 +687,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; + lower_mnt = lower_path.mnt; lower_inode = sdcardfs_lower_inode(inode); /* prepare our own lower struct iattr (with the lower file) */ @@ -718,7 +731,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) * tries to open(), unlink(), then ftruncate() a file. */ mutex_lock(&d_inode(lower_dentry)->i_mutex); - err = notify_change(lower_dentry, &lower_ia, /* note: lower_ia */ + err = notify_change2(lower_mnt, lower_dentry, &lower_ia, /* note: lower_ia */ NULL); mutex_unlock(&d_inode(lower_dentry)->i_mutex); if (current->mm) diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index cfda98d257b6..5132f1dc5a4d 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -476,7 +476,7 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m goto out_unlock; } - err = vfs_mkdir(d_inode(parent.dentry), dent, mode); + err = vfs_mkdir2(parent.mnt, d_inode(parent.dentry), dent, mode); if (err) { if (err == -EEXIST) err = 0; @@ -487,7 +487,7 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m attrs.ia_gid = make_kgid(&init_user_ns, gid); attrs.ia_valid = ATTR_UID | ATTR_GID; mutex_lock(&d_inode(dent)->i_mutex); - notify_change(dent, &attrs, NULL); + notify_change2(parent.mnt, dent, &attrs, NULL); mutex_unlock(&d_inode(dent)->i_mutex); out_dput: -- GitLab From 317e770e66d67fb849ba5bf0df2976c26f86ebae Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 26 Oct 2016 17:36:05 -0700 Subject: [PATCH 1222/1442] ANDROID: sdcardfs: Add gid and mask to private mount data Adds support for mount2, remount2, and the functions to allocate/clone/copy the private data The next patch will switch over to actually using it. Change-Id: I8a43da26021d33401f655f0b2784ead161c575e3 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/main.c | 103 ++++++++++++++++++++++++++++++++++++----- fs/sdcardfs/sdcardfs.h | 8 ++++ fs/sdcardfs/super.c | 64 ++++++++++++++++++++++--- 3 files changed, 157 insertions(+), 18 deletions(-) diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 2decea3d1e3e..5400e7e63d27 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -49,7 +49,8 @@ static const match_table_t sdcardfs_tokens = { }; static int parse_options(struct super_block *sb, char *options, int silent, - int *debug, struct sdcardfs_mount_options *opts) + int *debug, struct sdcardfs_vfsmount_options *vfsopts, + struct sdcardfs_mount_options *opts) { char *p; substring_t args[MAX_OPT_ARGS]; @@ -58,9 +59,11 @@ static int parse_options(struct super_block *sb, char *options, int silent, /* by default, we use AID_MEDIA_RW as uid, gid */ opts->fs_low_uid = AID_MEDIA_RW; opts->fs_low_gid = AID_MEDIA_RW; + vfsopts->mask = 0; opts->mask = 0; opts->multiuser = false; opts->fs_user_id = 0; + vfsopts->gid = 0; opts->gid = 0; /* by default, 0MB is reserved */ opts->reserved_mb = 0; @@ -95,6 +98,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, if (match_int(&args[0], &option)) return 0; opts->gid = option; + vfsopts->gid = option; break; case Opt_userid: if (match_int(&args[0], &option)) @@ -105,6 +109,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, if (match_int(&args[0], &option)) return 0; opts->mask = option; + vfsopts->mask = option; break; case Opt_multiuser: opts->multiuser = true; @@ -135,6 +140,65 @@ static int parse_options(struct super_block *sb, char *options, int silent, return 0; } +int parse_options_remount(struct super_block *sb, char *options, int silent, + struct sdcardfs_vfsmount_options *vfsopts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + int debug; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, sdcardfs_tokens, args); + + switch (token) { + case Opt_debug: + debug = 1; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + vfsopts->gid = option; + + break; + case Opt_mask: + if (match_int(&args[0], &option)) + return 0; + vfsopts->mask = option; + break; + case Opt_multiuser: + case Opt_userid: + case Opt_fsuid: + case Opt_fsgid: + case Opt_reserved_mb: + printk( KERN_WARNING "Option \"%s\" can't be changed during remount\n", p); + break; + /* unknown option */ + default: + if (!silent) { + printk( KERN_ERR "Unrecognized mount option \"%s\" " + "or missing value", p); + } + return -EINVAL; + } + } + + if (debug) { + printk( KERN_INFO "sdcardfs : options - debug:%d\n", debug); + printk( KERN_INFO "sdcardfs : options - gid:%d\n", vfsopts->gid); + printk( KERN_INFO "sdcardfs : options - mask:%d\n", vfsopts->mask); + } + + return 0; +} + #if 0 /* * our custom d_alloc_root work-alike @@ -172,14 +236,15 @@ EXPORT_SYMBOL_GPL(sdcardfs_super_list); * There is no need to lock the sdcardfs_super_info's rwsem as there is no * way anyone can have a reference to the superblock at this point in time. */ -static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, - void *raw_data, int silent) +static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, + const char *dev_name, void *raw_data, int silent) { int err = 0; int debug; struct super_block *lower_sb; struct path lower_path; struct sdcardfs_sb_info *sb_info; + struct sdcardfs_vfsmount_options *mnt_opt = mnt->data; struct inode *inode; printk(KERN_INFO "sdcardfs version 2.0\n"); @@ -212,7 +277,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, sb_info = sb->s_fs_info; /* parse options */ - err = parse_options(sb, raw_data, silent, &debug, &sb_info->options); + err = parse_options(sb, raw_data, silent, &debug, mnt_opt, &sb_info->options); if (err) { printk(KERN_ERR "sdcardfs: invalid options\n"); goto out_freesbi; @@ -306,9 +371,9 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, } /* A feature which supports mount_nodev() with options */ -static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, const char *, void *, int)) +static struct dentry *mount_nodev_with_options(struct vfsmount *mnt, + struct file_system_type *fs_type, int flags, const char *dev_name, void *data, + int (*fill_super)(struct vfsmount *, struct super_block *, const char *, void *, int)) { int error; @@ -319,7 +384,7 @@ static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type, s->s_flags = flags; - error = fill_super(s, dev_name, data, flags & MS_SILENT ? 1 : 0); + error = fill_super(mnt, s, dev_name, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); return ERR_PTR(error); @@ -328,15 +393,27 @@ static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type, return dget(s->s_root); } -struct dentry *sdcardfs_mount(struct file_system_type *fs_type, int flags, +static struct dentry *sdcardfs_mount(struct vfsmount *mnt, + struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { /* * dev_name is a lower_path_name, * raw_data is a option string. */ - return mount_nodev_with_options(fs_type, flags, dev_name, - raw_data, sdcardfs_read_super); + return mount_nodev_with_options(mnt, fs_type, flags, dev_name, + raw_data, sdcardfs_read_super); +} + +static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + WARN(1, "sdcardfs does not support mount. Use mount2.\n"); + return ERR_PTR(-EINVAL); +} + +void *sdcardfs_alloc_mnt_data(void) { + return kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); } void sdcardfs_kill_sb(struct super_block *sb) { @@ -353,7 +430,9 @@ void sdcardfs_kill_sb(struct super_block *sb) { static struct file_system_type sdcardfs_fs_type = { .owner = THIS_MODULE, .name = SDCARDFS_NAME, - .mount = sdcardfs_mount, + .mount = sdcardfs_mount_wrn, + .mount2 = sdcardfs_mount, + .alloc_mnt_data = sdcardfs_alloc_mnt_data, .kill_sb = sdcardfs_kill_sb, .fs_flags = 0, }; diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 5132f1dc5a4d..22ef29857022 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -193,6 +193,14 @@ struct sdcardfs_mount_options { unsigned int reserved_mb; }; +struct sdcardfs_vfsmount_options { + gid_t gid; + mode_t mask; +}; + +extern int parse_options_remount(struct super_block *sb, char *options, int silent, + struct sdcardfs_vfsmount_options *vfsopts); + /* sdcardfs super-block data in memory */ struct sdcardfs_sb_info { struct super_block *sb; diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index 0a465395aab7..edda32b68dc0 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -108,6 +108,50 @@ static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options return err; } +/* + * @mnt: mount point we are remounting + * @sb: superblock we are remounting + * @flags: numeric mount options + * @options: mount options string + */ +static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb, + int *flags, char *options) +{ + int err = 0; + + /* + * The VFS will take care of "ro" and "rw" flags among others. We + * can safely accept a few flags (RDONLY, MANDLOCK), and honor + * SILENT, but anything else left over is an error. + */ + if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT | MS_REMOUNT)) != 0) { + printk(KERN_ERR + "sdcardfs: remount flags 0x%x unsupported\n", *flags); + err = -EINVAL; + } + printk(KERN_INFO "Remount options were %s for vfsmnt %p.\n", options, mnt); + err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data); + + + return err; +} + +static void* sdcardfs_clone_mnt_data(void *data) { + struct sdcardfs_vfsmount_options* opt = kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); + struct sdcardfs_vfsmount_options* old = data; + if(!opt) return NULL; + opt->gid = old->gid; + opt->mask = old->mask; + return opt; +} + +static void sdcardfs_copy_mnt_data(void *data, void *newdata) { + struct sdcardfs_vfsmount_options* old = data; + struct sdcardfs_vfsmount_options* new = newdata; + old->gid = new->gid; + old->mask = new->mask; +} + /* * Called by iput() when the inode reference count reached zero * and the inode is not hashed anywhere. Used to clear anything @@ -191,19 +235,24 @@ static void sdcardfs_umount_begin(struct super_block *sb) lower_sb->s_op->umount_begin(lower_sb); } -static int sdcardfs_show_options(struct seq_file *m, struct dentry *root) +static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, struct dentry *root) { struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb); struct sdcardfs_mount_options *opts = &sbi->options; + struct sdcardfs_vfsmount_options *vfsopts = mnt->data; if (opts->fs_low_uid != 0) - seq_printf(m, ",uid=%u", opts->fs_low_uid); + seq_printf(m, ",fsuid=%u", opts->fs_low_uid); if (opts->fs_low_gid != 0) - seq_printf(m, ",gid=%u", opts->fs_low_gid); - + seq_printf(m, ",fsgid=%u", opts->fs_low_gid); + if (vfsopts->gid != 0) + seq_printf(m, ",gid=%u", vfsopts->gid); if (opts->multiuser) seq_printf(m, ",multiuser"); - + if (vfsopts->mask) + seq_printf(m, ",mask=%u", vfsopts->mask); + if (opts->fs_user_id) + seq_printf(m, ",userid=%u", opts->fs_user_id); if (opts->reserved_mb != 0) seq_printf(m, ",reserved=%uMB", opts->reserved_mb); @@ -214,9 +263,12 @@ const struct super_operations sdcardfs_sops = { .put_super = sdcardfs_put_super, .statfs = sdcardfs_statfs, .remount_fs = sdcardfs_remount_fs, + .remount_fs2 = sdcardfs_remount_fs2, + .clone_mnt_data = sdcardfs_clone_mnt_data, + .copy_mnt_data = sdcardfs_copy_mnt_data, .evict_inode = sdcardfs_evict_inode, .umount_begin = sdcardfs_umount_begin, - .show_options = sdcardfs_show_options, + .show_options2 = sdcardfs_show_options, .alloc_inode = sdcardfs_alloc_inode, .destroy_inode = sdcardfs_destroy_inode, .drop_inode = generic_delete_inode, -- GitLab From 90219273be12e416db458c150c8a73bb472517d8 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 26 Oct 2016 20:27:20 -0700 Subject: [PATCH 1223/1442] ANDROID: sdcardfs: Use per mount permissions This switches sdcardfs over to using permission2. Instead of mounting several sdcardfs instances onto the same underlaying directory, you bind mount a single mount several times, and remount with the options you want. These are stored in the private mount data, allowing you to maintain the same tree, but have different permissions for different mount points. Warning functions have been added for permission, as it should never be called, and the correct behavior is unclear. Change-Id: I841b1d70ec60cf2b866fa48edeb74a0b0f8334f5 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/derived_perm.c | 20 ++++-- fs/sdcardfs/inode.c | 127 ++++++++++++++++++++++++++++++------- fs/sdcardfs/lookup.c | 4 +- fs/sdcardfs/main.c | 8 +-- fs/sdcardfs/sdcardfs.h | 44 ++++++++----- 5 files changed, 150 insertions(+), 53 deletions(-) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 89daf69efbaa..066edbbb6ad6 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -141,13 +141,23 @@ void fixup_perms_recursive(struct dentry *dentry, const char* name, size_t len) info = SDCARDFS_I(d_inode(dentry)); if (needs_fixup(info->perm)) { + /* We need permission to fix up these values. + * Since permissions are based of of the mount, and + * we are accessing without the mount point, we create + * a fake mount with the permissions we will be using. + */ + struct vfsmount fakemnt; + struct sdcardfs_vfsmount_options opts; + fakemnt.data = &opts; + opts.gid = AID_SDCARD_RW; + opts.mask = 0; mutex_lock(&d_inode(dentry)->i_mutex); - child = lookup_one_len(name, dentry, len); + child = lookup_one_len2(name, &fakemnt, dentry, len); mutex_unlock(&d_inode(dentry)->i_mutex); if (!IS_ERR(child)) { - if (child->d_inode) { + if (d_inode(child)) { get_derived_permission(dentry, child); - fix_derived_permission(d_inode(child)); + fixup_tmp_permissions(d_inode(child)); } dput(child); } @@ -172,7 +182,7 @@ void fixup_top_recursive(struct dentry *parent) { if (d_inode(dentry)) { if (SDCARDFS_I(d_inode(parent))->top != SDCARDFS_I(d_inode(dentry))->top) { get_derived_permission(parent, dentry); - fix_derived_permission(d_inode(dentry)); + fixup_tmp_permissions(d_inode(dentry)); fixup_top_recursive(dentry); } } @@ -202,7 +212,7 @@ inline void update_derived_permission_lock(struct dentry *dentry) dput(parent); } } - fix_derived_permission(dentry->d_inode); + fixup_tmp_permissions(d_inode(dentry)); } int need_graft_path(struct dentry *dentry) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index dc64c9e2f5e7..76a6e8ad0736 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -531,7 +531,7 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* At this point, not all dentry information has been moved, so * we pass along new_dentry for the name.*/ get_derived_permission_new(new_dentry->d_parent, old_dentry, new_dentry); - fix_derived_permission(d_inode(old_dentry)); + fixup_tmp_permissions(d_inode(old_dentry)); fixup_top_recursive(old_dentry); out: unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); @@ -601,26 +601,63 @@ static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie) } #endif -static int sdcardfs_permission(struct inode *inode, int mask) +static int sdcardfs_permission_wrn(struct inode *inode, int mask) +{ + WARN(1, "sdcardfs does not support permission. Use permission2.\n"); + return -EINVAL; +} + +void copy_attrs(struct inode *dest, const struct inode *src) +{ + dest->i_mode = src->i_mode; + dest->i_uid = src->i_uid; + dest->i_gid = src->i_gid; + dest->i_rdev = src->i_rdev; + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; + dest->i_blkbits = src->i_blkbits; + dest->i_flags = src->i_flags; +#ifdef CONFIG_FS_POSIX_ACL + dest->i_acl = src->i_acl; +#endif +#ifdef CONFIG_SECURITY + dest->i_security = src->i_security; +#endif +} + +static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int mask) { int err; + struct inode tmp; struct inode *top = grab_top(SDCARDFS_I(inode)); - if (!top) + if (!top) { + release_top(SDCARDFS_I(inode)); + WARN(1, "Top value was null!\n"); return -EINVAL; - /* Ensure owner is up to date */ - if (!uid_eq(inode->i_uid, top->i_uid)) { - SDCARDFS_I(inode)->d_uid = SDCARDFS_I(top)->d_uid; - fix_derived_permission(inode); } - release_top(SDCARDFS_I(inode)); /* * Permission check on sdcardfs inode. * Calling process should have AID_SDCARD_RW permission + * Since generic_permission only needs i_mode, i_uid, + * i_gid, and i_sb, we can create a fake inode to pass + * this information down in. + * + * The underlying code may attempt to take locks in some + * cases for features we're not using, but if that changes, + * locks must be dealt with to avoid undefined behavior. */ - err = generic_permission(inode, mask); - + copy_attrs(&tmp, inode); + tmp.i_uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top))); + tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top)); + release_top(SDCARDFS_I(inode)); + tmp.i_sb = inode->i_sb; + if (IS_POSIXACL(inode)) + printk(KERN_WARNING "%s: This may be undefined behavior... \n", __func__); + err = generic_permission(&tmp, mask); /* XXX * Original sdcardfs code calls inode_permission(lower_inode,.. ) * for checking inode permission. But doing such things here seems @@ -649,7 +686,13 @@ static int sdcardfs_permission(struct inode *inode, int mask) } -static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) +static int sdcardfs_setattr_wrn(struct dentry *dentry, struct iattr *ia) +{ + WARN(1, "sdcardfs does not support setattr. User setattr2.\n"); + return -EINVAL; +} + +static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct iattr *ia) { int err; struct dentry *lower_dentry; @@ -659,17 +702,45 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) struct path lower_path; struct iattr lower_ia; struct dentry *parent; + struct inode tmp; + struct inode *top; + const struct cred *saved_cred = NULL; inode = d_inode(dentry); + top = grab_top(SDCARDFS_I(inode)); + + if (!top) { + release_top(SDCARDFS_I(inode)); + return -EINVAL; + } + + /* + * Permission check on sdcardfs inode. + * Calling process should have AID_SDCARD_RW permission + * Since generic_permission only needs i_mode, i_uid, + * i_gid, and i_sb, we can create a fake inode to pass + * this information down in. + * + * The underlying code may attempt to take locks in some + * cases for features we're not using, but if that changes, + * locks must be dealt with to avoid undefined behavior. + * + */ + copy_attrs(&tmp, inode); + tmp.i_uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top))); + tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top)); + tmp.i_size = i_size_read(inode); + release_top(SDCARDFS_I(inode)); + tmp.i_sb = inode->i_sb; /* * Check if user has permission to change inode. We don't check if * this user can change the lower inode: that should happen when * calling notify_change on the lower inode. */ - err = inode_change_ok(inode, ia); + err = inode_change_ok(&tmp, ia); - /* no vfs_XXX operations required, cred overriding will be skipped. wj*/ if (!err) { /* check the Android group ID */ parent = dget_parent(dentry); @@ -685,6 +756,9 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) if (err) goto out_err; + /* save current_cred and override it */ + OVERRIDE_CRED(SDCARDFS_SB(dentry->d_sb), saved_cred); + sdcardfs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; lower_mnt = lower_path.mnt; @@ -708,7 +782,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) if (current->mm) down_write(¤t->mm->mmap_sem); if (ia->ia_valid & ATTR_SIZE) { - err = inode_newsize_ok(inode, ia->ia_size); + err = inode_newsize_ok(&tmp, ia->ia_size); if (err) { if (current->mm) up_write(¤t->mm->mmap_sem); @@ -750,11 +824,12 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) out: sdcardfs_put_lower_path(dentry, &lower_path); + REVERT_CRED(saved_cred); out_err: return err; } -static int sdcardfs_fillattr(struct inode *inode, struct kstat *stat) +static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode, struct kstat *stat) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); struct inode *top = grab_top(info); @@ -763,10 +838,10 @@ static int sdcardfs_fillattr(struct inode *inode, struct kstat *stat) stat->dev = inode->i_sb->s_dev; stat->ino = inode->i_ino; - stat->mode = (inode->i_mode & S_IFMT) | get_mode(SDCARDFS_I(top)); + stat->mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top)); stat->nlink = inode->i_nlink; stat->uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid); - stat->gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(top))); + stat->gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top))); stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode->i_atime; @@ -807,14 +882,14 @@ static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, sdcardfs_copy_and_fix_attrs(inode, lower_inode); fsstack_copy_inode_size(inode, lower_inode); - err = sdcardfs_fillattr(inode, stat); + err = sdcardfs_fillattr(mnt, inode, stat); sdcardfs_put_lower_path(dentry, &lower_path); return err; } const struct inode_operations sdcardfs_symlink_iops = { - .permission = sdcardfs_permission, - .setattr = sdcardfs_setattr, + .permission2 = sdcardfs_permission, + .setattr2 = sdcardfs_setattr, /* XXX Following operations are implemented, * but FUSE(sdcard) or FAT does not support them * These methods are *NOT* perfectly tested. @@ -827,12 +902,14 @@ const struct inode_operations sdcardfs_symlink_iops = { const struct inode_operations sdcardfs_dir_iops = { .create = sdcardfs_create, .lookup = sdcardfs_lookup, - .permission = sdcardfs_permission, + .permission = sdcardfs_permission_wrn, + .permission2 = sdcardfs_permission, .unlink = sdcardfs_unlink, .mkdir = sdcardfs_mkdir, .rmdir = sdcardfs_rmdir, .rename = sdcardfs_rename, - .setattr = sdcardfs_setattr, + .setattr = sdcardfs_setattr_wrn, + .setattr2 = sdcardfs_setattr, .getattr = sdcardfs_getattr, /* XXX Following operations are implemented, * but FUSE(sdcard) or FAT does not support them @@ -844,7 +921,9 @@ const struct inode_operations sdcardfs_dir_iops = { }; const struct inode_operations sdcardfs_main_iops = { - .permission = sdcardfs_permission, - .setattr = sdcardfs_setattr, + .permission = sdcardfs_permission_wrn, + .permission2 = sdcardfs_permission, + .setattr = sdcardfs_setattr_wrn, + .setattr2 = sdcardfs_setattr, .getattr = sdcardfs_getattr, }; diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index c74a7d1bc18e..00a711ec2733 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -244,6 +244,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, if (err == -ENOENT) { struct dentry *child; struct dentry *match = NULL; + mutex_lock(&d_inode(lower_dir_dentry)->i_mutex); spin_lock(&lower_dir_dentry->d_lock); list_for_each_entry(child, &lower_dir_dentry->d_subdirs, d_child) { if (child && d_inode(child)) { @@ -254,6 +255,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, } } spin_unlock(&lower_dir_dentry->d_lock); + mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex); if (match) { err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, @@ -389,7 +391,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, sdcardfs_lower_inode(dentry->d_inode)); /* get derived permission */ get_derived_permission(parent, dentry); - fix_derived_permission(dentry->d_inode); + fixup_tmp_permissions(d_inode(dentry)); } /* update parent directory's atime */ fsstack_copy_attr_atime(parent->d_inode, diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 5400e7e63d27..eec10ccacd99 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -28,7 +28,6 @@ enum { Opt_fsgid, Opt_gid, Opt_debug, - Opt_lower_fs, Opt_mask, Opt_multiuser, // May need? Opt_userid, @@ -60,11 +59,9 @@ static int parse_options(struct super_block *sb, char *options, int silent, opts->fs_low_uid = AID_MEDIA_RW; opts->fs_low_gid = AID_MEDIA_RW; vfsopts->mask = 0; - opts->mask = 0; opts->multiuser = false; opts->fs_user_id = 0; vfsopts->gid = 0; - opts->gid = 0; /* by default, 0MB is reserved */ opts->reserved_mb = 0; @@ -97,7 +94,6 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_gid: if (match_int(&args[0], &option)) return 0; - opts->gid = option; vfsopts->gid = option; break; case Opt_userid: @@ -108,7 +104,6 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_mask: if (match_int(&args[0], &option)) return 0; - opts->mask = option; vfsopts->mask = option; break; case Opt_multiuser: @@ -258,6 +253,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, printk(KERN_INFO "sdcardfs: dev_name -> %s\n", dev_name); printk(KERN_INFO "sdcardfs: options -> %s\n", (char *)raw_data); + printk(KERN_INFO "sdcardfs: mnt -> %p\n", mnt); /* parse lower path */ err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, @@ -342,7 +338,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, setup_derived_state(d_inode(sb->s_root), PERM_ROOT, sb_info->options.fs_user_id, AID_ROOT, false, d_inode(sb->s_root)); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } - fix_derived_permission(sb->s_root->d_inode); + fixup_tmp_permissions(d_inode(sb->s_root)); sb_info->sb = sb; list_add(&sb_info->list, &sdcardfs_super_list); mutex_unlock(&sdcardfs_super_list_lock); diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 22ef29857022..b03130329014 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -68,14 +68,20 @@ #define AID_PACKAGE_INFO 1027 -#define fix_derived_permission(x) \ + +/* + * Permissions are handled by our permission function. + * We don't want anyone who happens to look at our inode value to prematurely + * block access, so store more permissive values. These are probably never + * used. + */ +#define fixup_tmp_permissions(x) \ do { \ (x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid); \ - (x)->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(x))); \ - (x)->i_mode = ((x)->i_mode & S_IFMT) | get_mode(SDCARDFS_I(x));\ + (x)->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW); \ + (x)->i_mode = ((x)->i_mode & S_IFMT) | 0775;\ } while (0) - /* OVERRIDE_CRED() and REVERT_CRED() * OVERRID_CRED() * backup original task->cred @@ -187,8 +193,6 @@ struct sdcardfs_mount_options { uid_t fs_low_uid; gid_t fs_low_gid; userid_t fs_user_id; - gid_t gid; - mode_t mask; bool multiuser; unsigned int reserved_mb; }; @@ -360,9 +364,10 @@ static inline void release_top(struct sdcardfs_inode_info *info) iput(info->top); } -static inline int get_gid(struct sdcardfs_inode_info *info) { - struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); - if (sb_info->options.gid == AID_SDCARD_RW) { +static inline int get_gid(struct vfsmount *mnt, struct sdcardfs_inode_info *info) { + struct sdcardfs_vfsmount_options *opts = mnt->data; + + if (opts->gid == AID_SDCARD_RW) { /* As an optimization, certain trusted system components only run * as owner but operate across all users. Since we're now handing * out the sdcard_rw GID only to trusted apps, we're okay relaxing @@ -370,14 +375,15 @@ static inline int get_gid(struct sdcardfs_inode_info *info) { * assigned to app directories are still multiuser aware. */ return AID_SDCARD_RW; } else { - return multiuser_get_uid(info->userid, sb_info->options.gid); + return multiuser_get_uid(info->userid, opts->gid); } } -static inline int get_mode(struct sdcardfs_inode_info *info) { +static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *info) { int owner_mode; int filtered_mode; - struct sdcardfs_sb_info * sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); - int visible_mode = 0775 & ~sb_info->options.mask; + struct sdcardfs_vfsmount_options *opts = mnt->data; + int visible_mode = 0775 & ~opts->mask; + if (info->perm == PERM_PRE_ROOT) { /* Top of multi-user view should always be visible to ensure @@ -387,7 +393,7 @@ static inline int get_mode(struct sdcardfs_inode_info *info) { /* Block "other" access to Android directories, since only apps * belonging to a specific user should be in there; we still * leave +x open for the default view. */ - if (sb_info->options.gid == AID_SDCARD_RW) { + if (opts->gid == AID_SDCARD_RW) { visible_mode = visible_mode & ~0006; } else { visible_mode = visible_mode & ~0007; @@ -553,12 +559,16 @@ static inline int check_min_free_space(struct dentry *dentry, size_t size, int d return 1; } -/* Copies attrs and maintains sdcardfs managed attrs */ +/* + * Copies attrs and maintains sdcardfs managed attrs + * Since our permission check handles all special permissions, set those to be open + */ static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct inode *src) { - dest->i_mode = (src->i_mode & S_IFMT) | get_mode(SDCARDFS_I(dest)); + dest->i_mode = (src->i_mode & S_IFMT) | S_IRWXU | S_IRWXG | + S_IROTH | S_IXOTH; /* 0775 */ dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->d_uid); - dest->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(dest))); + dest->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW); dest->i_rdev = src->i_rdev; dest->i_atime = src->i_atime; dest->i_mtime = src->i_mtime; -- GitLab From 1503e16e4832011c100e4269c1e19f99a706c4b0 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 15 Nov 2016 13:35:18 -0800 Subject: [PATCH 1224/1442] ANDROID: sdcardfs: Change magic value Sdcardfs uses the same magic value as wrapfs. This should not be the case. As it is entirely in memory, the value can be changed without any loss of compatibility. Change-Id: I24200b805d5e6d32702638be99e47d50d7f2f746 Signed-off-by: Daniel Rosenberg --- include/uapi/linux/magic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 270b764f880e..bd017699420e 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -53,7 +53,7 @@ #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" -#define SDCARDFS_SUPER_MAGIC 0xb550ca10 +#define SDCARDFS_SUPER_MAGIC 0x5dca2df5 #define SMB_SUPER_MAGIC 0x517B #define CGROUP_SUPER_MAGIC 0x27e0eb -- GitLab From 63d2076e047ed9da56a08e4a462e843bd5562647 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 1 Dec 2016 14:36:29 -0800 Subject: [PATCH 1225/1442] ANDROID: sdcardfs: Switch ->d_inode to d_inode() Change-Id: I12375cc2d6e82fb8adf0319be971f335f8d7a312 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/derived_perm.c | 16 ++++++++-------- fs/sdcardfs/file.c | 2 +- fs/sdcardfs/lookup.c | 14 +++++++------- fs/sdcardfs/main.c | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 066edbbb6ad6..60ae94bb99f7 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -49,8 +49,8 @@ void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, /* While renaming, there is a point where we want the path from dentry, but the name from newdentry */ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry) { - struct sdcardfs_inode_info *info = SDCARDFS_I(dentry->d_inode); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry)); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); appid_t appid; /* By default, each inode inherits from its parent. @@ -61,7 +61,7 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, st * stage of each system call by fix_derived_permission(inode). */ - inherit_derived_state(parent->d_inode, dentry->d_inode); + inherit_derived_state(d_inode(parent), d_inode(dentry)); /* Derive custom permissions based on parent and current node */ switch (parent_info->perm) { @@ -134,7 +134,7 @@ void fixup_perms_recursive(struct dentry *dentry, const char* name, size_t len) struct sdcardfs_inode_info *info; if (!dget(dentry)) return; - if (!dentry->d_inode) { + if (!d_inode(dentry)) { dput(dentry); return; } @@ -195,7 +195,7 @@ inline void update_derived_permission_lock(struct dentry *dentry) { struct dentry *parent; - if(!dentry || !dentry->d_inode) { + if(!dentry || !d_inode(dentry)) { printk(KERN_ERR "sdcardfs: %s: invalid dentry\n", __func__); return; } @@ -204,7 +204,7 @@ inline void update_derived_permission_lock(struct dentry *dentry) * 2. remove the root dentry update */ if(IS_ROOT(dentry)) { - //setup_default_pre_root_state(dentry->d_inode); + //setup_default_pre_root_state(d_inode(dentry)); } else { parent = dget_parent(dentry); if(parent) { @@ -219,7 +219,7 @@ int need_graft_path(struct dentry *dentry) { int ret = 0; struct dentry *parent = dget_parent(dentry); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); if(parent_info->perm == PERM_ANDROID && @@ -278,7 +278,7 @@ int is_base_obbpath(struct dentry *dentry) { int ret = 0; struct dentry *parent = dget_parent(dentry); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); + struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); spin_lock(&SDCARDFS_D(dentry)->lock); diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index c249fa982d3c..7750a0472389 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -216,7 +216,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file) goto out_err; } - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) { + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", __func__, dentry->d_name.name, current->comm); diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index 00a711ec2733..e94a65c8bbbd 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -179,7 +179,7 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, struct inode *lower_inode; struct super_block *lower_sb; - lower_inode = lower_path->dentry->d_inode; + lower_inode = d_inode(lower_path->dentry); lower_sb = sdcardfs_lower_super(sb); /* check that the lower file system didn't cross a mount point */ @@ -359,7 +359,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, parent = dget_parent(dentry); - if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) { + if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { ret = ERR_PTR(-EACCES); printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" " dentry: %s, task:%s\n", @@ -386,16 +386,16 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, } if (ret) dentry = ret; - if (dentry->d_inode) { - fsstack_copy_attr_times(dentry->d_inode, - sdcardfs_lower_inode(dentry->d_inode)); + if (d_inode(dentry)) { + fsstack_copy_attr_times(d_inode(dentry), + sdcardfs_lower_inode(d_inode(dentry))); /* get derived permission */ get_derived_permission(parent, dentry); fixup_tmp_permissions(d_inode(dentry)); } /* update parent directory's atime */ - fsstack_copy_attr_atime(parent->d_inode, - sdcardfs_lower_inode(parent->d_inode)); + fsstack_copy_attr_atime(d_inode(parent), + sdcardfs_lower_inode(d_inode(parent))); out: sdcardfs_put_lower_path(parent, &lower_parent_path); diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index eec10ccacd99..7a8eae29e44d 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -297,7 +297,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, sb->s_op = &sdcardfs_sops; /* get a new inode and allocate our root dentry */ - inode = sdcardfs_iget(sb, lower_path.dentry->d_inode, 0); + inode = sdcardfs_iget(sb, d_inode(lower_path.dentry), 0); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_sput; -- GitLab From 3adfc032510c566cbb77409061bbbb789d52cd1e Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Tue, 27 Dec 2016 12:36:29 -0800 Subject: [PATCH 1226/1442] ANDROID: sdcardfs: Fix locking issue with permision fix up Don't use lookup_one_len so we can grab the spinlock that protects d_subdirs. Bug: 30954918 Change-Id: I0c6a393252db7beb467e0d563739a3a14e1b5115 Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/derived_perm.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 60ae94bb99f7..9408a5477ada 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -141,32 +141,26 @@ void fixup_perms_recursive(struct dentry *dentry, const char* name, size_t len) info = SDCARDFS_I(d_inode(dentry)); if (needs_fixup(info->perm)) { - /* We need permission to fix up these values. - * Since permissions are based of of the mount, and - * we are accessing without the mount point, we create - * a fake mount with the permissions we will be using. - */ - struct vfsmount fakemnt; - struct sdcardfs_vfsmount_options opts; - fakemnt.data = &opts; - opts.gid = AID_SDCARD_RW; - opts.mask = 0; - mutex_lock(&d_inode(dentry)->i_mutex); - child = lookup_one_len2(name, &fakemnt, dentry, len); - mutex_unlock(&d_inode(dentry)->i_mutex); - if (!IS_ERR(child)) { - if (d_inode(child)) { - get_derived_permission(dentry, child); - fixup_tmp_permissions(d_inode(child)); - } - dput(child); + spin_lock(&dentry->d_lock); + list_for_each_entry(child, &dentry->d_subdirs, d_child) { + dget(child); + if (!strncasecmp(child->d_name.name, name, len)) { + if (child->d_inode) { + get_derived_permission(dentry, child); + fixup_tmp_permissions(child->d_inode); + dput(child); + break; + } + } + dput(child); } + spin_unlock(&dentry->d_lock); } else if (descendant_may_need_fixup(info->perm)) { - mutex_lock(&d_inode(dentry)->i_mutex); + spin_lock(&dentry->d_lock); list_for_each_entry(child, &dentry->d_subdirs, d_child) { fixup_perms_recursive(child, name, len); } - mutex_unlock(&d_inode(dentry)->i_mutex); + spin_unlock(&dentry->d_lock); } dput(dentry); } -- GitLab From 1eea0df0513dac841da06a70ec4e58900dc69e98 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 5 Jan 2017 14:37:11 -0800 Subject: [PATCH 1227/1442] ANDROID: mnt: remount should propagate to slaves of slaves propagate_remount was not accounting for the slave mounts of other slave mounts, leading to some namespaces not recieving the remount information. bug:33731928 Change-Id: Idc9e8c2ed126a4143229fc23f10a959c2d0a3854 Signed-off-by: Daniel Rosenberg --- fs/pnode.c | 27 +++++++++++++++++++++------ fs/pnode.h | 2 +- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/pnode.c b/fs/pnode.c index 867a6cc2e3b8..83b5bb1fdbb8 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -459,16 +459,31 @@ int propagate_umount(struct list_head *list) return 0; } -int propagate_remount(struct mount *mnt) { - struct mount *m; +/* + * Iterates over all slaves, and slaves of slaves. + */ +static struct mount *next_descendent(struct mount *root, struct mount *cur) +{ + if (!IS_MNT_NEW(cur) && !list_empty(&cur->mnt_slave_list)) + return first_slave(cur); + do { + if (cur->mnt_slave.next != &cur->mnt_master->mnt_slave_list) + return next_slave(cur); + cur = cur->mnt_master; + } while (cur != root); + return NULL; +} + +void propagate_remount(struct mount *mnt) +{ + struct mount *m = mnt; struct super_block *sb = mnt->mnt.mnt_sb; - int ret = 0; if (sb->s_op->copy_mnt_data) { - for (m = first_slave(mnt); m->mnt_slave.next != &mnt->mnt_slave_list; m = next_slave(m)) { + m = next_descendent(mnt, m); + while (m) { sb->s_op->copy_mnt_data(m->mnt.data, mnt->mnt.data); + m = next_descendent(mnt, m); } } - - return ret; } diff --git a/fs/pnode.h b/fs/pnode.h index 2c2736646810..03a8001c4af2 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -44,7 +44,7 @@ int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, int propagate_umount(struct list_head *); int propagate_mount_busy(struct mount *, int); void propagate_mount_unlock(struct mount *); -int propagate_remount(struct mount *); +void propagate_remount(struct mount *); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); unsigned int mnt_get_count(struct mount *mnt); -- GitLab From b47e110e65b057973af530735f28bc1f3794cba7 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Wed, 1 Jun 2016 21:53:20 +0530 Subject: [PATCH 1228/1442] ANDROID: sdcardfs: use wrappers to access i_mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use inode_{lock,unlock,lock_nested} wrappers as suggested by upstream commit 5955102c9984 (wrappers for ->i_mutex access) for access to ->i_mutex, otherwise we run into following build error: CC [M] fs/sdcardfs/dentry.o In file included from fs/sdcardfs/dentry.c:21:0: fs/sdcardfs/sdcardfs.h: In function ‘lock_parent’: fs/sdcardfs/sdcardfs.h:422:33: error: ‘struct inode’ has no member named ‘i_mutex’ mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); ^ fs/sdcardfs/sdcardfs.h: In function ‘unlock_dir’: fs/sdcardfs/sdcardfs.h:428:28: error: ‘struct inode’ has no member named ‘i_mutex’ mutex_unlock(&d_inode(dir)->i_mutex); ^ In file included from ./include/linux/fs.h:19:0, from fs/sdcardfs/sdcardfs.h:31, from fs/sdcardfs/dentry.c:21: fs/sdcardfs/sdcardfs.h: In function ‘prepare_dir’: fs/sdcardfs/sdcardfs.h:457:27: error: ‘struct inode’ has no member named ‘i_mutex’ mutex_lock(&d_inode(dent)->i_mutex); ^ ./include/linux/mutex.h:146:44: note: in definition of macro ‘mutex_lock’ #define mutex_lock(lock) mutex_lock_nested(lock, 0) ^ In file included from fs/sdcardfs/dentry.c:21:0: fs/sdcardfs/sdcardfs.h:459:29: error: ‘struct inode’ has no member named‘i_mutex’ mutex_unlock(&d_inode(dent)->i_mutex); ^ fs/sdcardfs/sdcardfs.h:466:38: error: ‘struct inode’ has no member named ‘i_mutex’ mutex_unlock(&d_inode(parent.dentry)->i_mutex); ^ Change-Id: I4c8298045ac511aba5542d9ca967331f550376a5 Signed-off-by: Amit Pundir --- fs/sdcardfs/inode.c | 4 ++-- fs/sdcardfs/lookup.c | 6 +++--- fs/sdcardfs/sdcardfs.h | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 76a6e8ad0736..17af0c18dc79 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -804,10 +804,10 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct * unlinked (no inode->i_sb and i_ino==0. This happens if someone * tries to open(), unlink(), then ftruncate() a file. */ - mutex_lock(&d_inode(lower_dentry)->i_mutex); + inode_lock(d_inode(lower_dentry)); err = notify_change2(lower_mnt, lower_dentry, &lower_ia, /* note: lower_ia */ NULL); - mutex_unlock(&d_inode(lower_dentry)->i_mutex); + inode_unlock(d_inode(lower_dentry)); if (current->mm) up_write(¤t->mm->mmap_sem); if (err) diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index e94a65c8bbbd..d9d46308ef94 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -244,7 +244,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, if (err == -ENOENT) { struct dentry *child; struct dentry *match = NULL; - mutex_lock(&d_inode(lower_dir_dentry)->i_mutex); + inode_lock(d_inode(lower_dir_dentry)); spin_lock(&lower_dir_dentry->d_lock); list_for_each_entry(child, &lower_dir_dentry->d_subdirs, d_child) { if (child && d_inode(child)) { @@ -255,7 +255,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, } } spin_unlock(&lower_dir_dentry->d_lock); - mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex); + inode_unlock(d_inode(lower_dir_dentry)); if (match) { err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, @@ -344,7 +344,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, * On fail (== error) * returns error ptr * - * @dir : Parent inode. It is locked (dir->i_mutex) + * @dir : Parent inode. * @dentry : Target dentry to lookup. we should set each of fields. * (dentry->d_name is initialized already) * @nd : nameidata of parent inode diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index b03130329014..66a97ef8d261 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -465,13 +465,13 @@ extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path); static inline struct dentry *lock_parent(struct dentry *dentry) { struct dentry *dir = dget_parent(dentry); - mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); return dir; } static inline void unlock_dir(struct dentry *dir) { - mutex_unlock(&d_inode(dir)->i_mutex); + inode_unlock(d_inode(dir)); dput(dir); } @@ -500,16 +500,16 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m attrs.ia_uid = make_kuid(&init_user_ns, uid); attrs.ia_gid = make_kgid(&init_user_ns, gid); attrs.ia_valid = ATTR_UID | ATTR_GID; - mutex_lock(&d_inode(dent)->i_mutex); + inode_lock(d_inode(dent)); notify_change2(parent.mnt, dent, &attrs, NULL); - mutex_unlock(&d_inode(dent)->i_mutex); + inode_unlock(d_inode(dent)); out_dput: dput(dent); out_unlock: /* parent dentry locked by lookup_create */ - mutex_unlock(&d_inode(parent.dentry)->i_mutex); + inode_unlock(d_inode(parent.dentry)); path_put(&parent); return err; } -- GitLab From 48bc6d3a94db0cfc621687c055641f35e5da7929 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 4 Aug 2016 21:04:31 +0530 Subject: [PATCH 1229/1442] ANDROID: sdcardfs: add parent pointer into dentry name hash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix following sdcardfs compilation error introduced in code refactoring by upstream commit 8387ff2577eb ("vfs: make the string hashes salt the hash"). CC [M] fs/sdcardfs/dentry.o In file included from ./include/linux/dcache.h:13:0, from fs/sdcardfs/sdcardfs.h:29, from fs/sdcardfs/dentry.c:21: fs/sdcardfs/dentry.c: In function ‘sdcardfs_hash_ci’: ./include/linux/stringhash.h:38:51: error: expected expression before ‘)’ token #define init_name_hash(salt) (unsigned long)(salt) ^ fs/sdcardfs/dentry.c:138:9: note: in expansion of macro ‘init_name_hash’ hash = init_name_hash(); ^ Change-Id: I9feb6c075a7e953726954f5746fc009202d3121c Signed-off-by: Amit Pundir --- fs/sdcardfs/dentry.c | 2 +- fs/sdcardfs/lookup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index 971928ab6c21..e4156a6fa8c8 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -135,7 +135,7 @@ static int sdcardfs_hash_ci(const struct dentry *dentry, //len = vfat_striptail_len(qstr); len = qstr->len; - hash = init_name_hash(); + hash = init_name_hash(dentry); while (len--) //hash = partial_name_hash(nls_tolower(t, *name++), hash); hash = partial_name_hash(tolower(*name++), hash); diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index d9d46308ef94..d271617290ee 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -309,7 +309,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, /* instatiate a new negative dentry */ this.name = name; this.len = strlen(name); - this.hash = full_name_hash(this.name, this.len); + this.hash = full_name_hash(dentry, this.name, this.len); lower_dentry = d_lookup(lower_dir_dentry, &this); if (lower_dentry) goto setup_lower; -- GitLab From c9bae39a9b9cd4b5ed50d56bb81a664cfbaeb596 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 8 Aug 2016 12:27:33 +0530 Subject: [PATCH 1230/1442] ANDROID: sdcardfs: get rid of 'parent' argument of ->d_compare() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ->d_compare() doesn't get parent as a separate argument anymore according to upstream commit 6fa67e707559 ("get rid of 'parent' argument of ->d_compare()"). We run into following build error otherwise: CC [M] fs/sdcardfs/dentry.o fs/sdcardfs/dentry.c:183:15: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types] .d_compare = sdcardfs_cmp_ci, ^ fs/sdcardfs/dentry.c:183:15: note: (near initialization for ‘sdcardfs_ci_dops.d_compare’) Change-Id: I51801b57aeb8287f1e69ce6cb944e8722ff37bea Signed-off-by: Amit Pundir --- fs/sdcardfs/dentry.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index e4156a6fa8c8..f22de8add10c 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -147,8 +147,7 @@ static int sdcardfs_hash_ci(const struct dentry *dentry, /* * Case insensitive compare of two vfat names. */ -static int sdcardfs_cmp_ci(const struct dentry *parent, - const struct dentry *dentry, +static int sdcardfs_cmp_ci(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { /* This function is copy of vfat_cmpi */ -- GitLab From 82a2800b30f3f0ba9ebd361e0e3375f3dbba38bc Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 11 Oct 2016 13:26:17 +0530 Subject: [PATCH 1231/1442] ANDROID: sdcardfs: Propagate dentry down to inode_change_ok() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 31051c85b5e2 ("fs: Give dentry to inode_change_ok() instead of inode"), to avoid clearing of capabilities or security related extended attributes too early, inode_change_ok() will need to take dentry instead of inode. Propagate it down to sdcardfs_setattr() and also rename it to setattr_prepare(), otherwise we run into following build error: CC [M] fs/sdcardfs/inode.o fs/sdcardfs/inode.c: In function ‘sdcardfs_setattr’: fs/sdcardfs/inode.c:644:8: error: implicit declaration of function ‘inode_change_ok’ [-Werror=implicit-function-declaration] err = inode_change_ok(inode, ia); ^ Change-Id: I714b4f4f68b7fea1ac82a71d2f323c76b11fa008 Signed-off-by: Amit Pundir --- fs/sdcardfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 17af0c18dc79..947918c2177e 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -703,6 +703,7 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct struct iattr lower_ia; struct dentry *parent; struct inode tmp; + struct dentry tmp_d; struct inode *top; const struct cred *saved_cred = NULL; @@ -733,13 +734,14 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct tmp.i_size = i_size_read(inode); release_top(SDCARDFS_I(inode)); tmp.i_sb = inode->i_sb; + tmp_d.d_inode = &tmp; /* - * Check if user has permission to change inode. We don't check if + * Check if user has permission to change dentry. We don't check if * this user can change the lower inode: that should happen when * calling notify_change on the lower inode. */ - err = inode_change_ok(&tmp, ia); + err = setattr_prepare(&tmp_d, ia); if (!err) { /* check the Android group ID */ -- GitLab From d4ae31ba989c48ba6dc96add3d6e1046daeafdc6 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Sun, 16 Oct 2016 15:24:15 +0530 Subject: [PATCH 1232/1442] ANDROID: sdcardfs: make it use new .rename i_op MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 2773bf00aeb9 ("fs: rename "rename2" i_op to "rename""), syscall rename2 is merged with rename syscall and it broke sdcard_fs build and we get following build error: CC [M] fs/sdcardfs/inode.o fs/sdcardfs/inode.c:786:13: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types] .rename = sdcardfs_rename, ^ fs/sdcardfs/inode.c:786:13: note: (near initialization for ‘sdcardfs_dir_iops.rename’) renameat2 syscall is the same as renameat with an added flags argument and calling renameat2 with flags=0 is equivalent to calling renameat. Change-Id: I48f3c76c3af481241188253a76f310670de6bd18 Signed-off-by: Amit Pundir --- fs/sdcardfs/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 947918c2177e..5b311708195b 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -457,7 +457,8 @@ static int sdcardfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode * superblock-level name-space lock for renames and copy-ups. */ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) { int err = 0; struct dentry *lower_old_dentry = NULL; @@ -470,6 +471,9 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct path lower_old_path, lower_new_path; const struct cred *saved_cred = NULL; + if (flags) + return -EINVAL; + if(!check_caller_access_to_name(old_dir, old_dentry->d_name.name) || !check_caller_access_to_name(new_dir, new_dentry->d_name.name)) { printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" -- GitLab From 71f1b5947180935f51766c7bf3e190406028758f Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 7 Jun 2016 16:30:56 +0530 Subject: [PATCH 1233/1442] ANDROID: sdcardfs: eliminate the offset argument to ->direct_IO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminate the offset argument to sdcardfs_direct_IO() which is dropped by upstream commit c8b8e32d700f ("direct-io: eliminate the offset argument to ->direct_IO"), otherwise we run into following build error: CC [M] fs/sdcardfs/mmap.o fs/sdcardfs/mmap.c:76:15: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types] .direct_IO = sdcardfs_direct_IO, ^ fs/sdcardfs/mmap.c:76:15: note: (near initialization for ‘sdcardfs_aops.direct_IO’) Change-Id: I292d93bb16365a9fa46494accb2b5da51028b5c1 Signed-off-by: Amit Pundir --- fs/sdcardfs/mmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c index e21f64675a80..ac5f3deae088 100644 --- a/fs/sdcardfs/mmap.c +++ b/fs/sdcardfs/mmap.c @@ -48,8 +48,7 @@ static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return err; } -static ssize_t sdcardfs_direct_IO(struct kiocb *iocb, - struct iov_iter *iter, loff_t pos) +static ssize_t sdcardfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { /* * This function returns zero on purpose in order to support direct IO. -- GitLab From a9a8266a899a454c4357f44885fbeaba0aa8b501 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 30 Jan 2017 13:55:23 +0530 Subject: [PATCH 1234/1442] ANDROID: cpufreq: conservative: fix duplicate 'static' error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix following duplicate 'static' declaration error: drivers/cpufreq/cpufreq_conservative.c:308:1: error: duplicate ‘static’ static struct dbs_governor cs_governor = { ^ scripts/Makefile.build:293: recipe for target 'drivers/cpufreq/cpufreq_conservative.o' failed Fixes: ("ANDROID: [CPUFREQ] Don't export governors for default governor") Signed-off-by: Amit Pundir --- drivers/cpufreq/cpufreq_conservative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index afa0364196e3..e8e16a5dbd1e 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -305,7 +305,7 @@ static void cs_start(struct cpufreq_policy *policy) #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE static #endif -static struct dbs_governor cs_governor = { +struct dbs_governor cs_governor = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), .kobj_type = { .default_attrs = cs_attributes }, .gov_dbs_timer = cs_dbs_timer, -- GitLab From c33cc8f2987a31f57457458847f518920faf7bb7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 17 May 2016 14:41:22 +0530 Subject: [PATCH 1235/1442] FROMLIST: cpufreq: Add android's 'interactive' governor https://lkml.org/lkml/2016/9/14/208 Interactive governor has lived in Android sources for a very long time and this commit is based on the code present in following branch: https://android.googlesource.com/kernel/common android-4.4 The Interactive governor is designed for latency-sensitive workloads, such as interactive user interfaces like the mobile phones and tablets. The interactive governor aims to be significantly more responsive to ramp CPU quickly up when CPU-intensive activity begins. Existing governors sample CPU load at a particular rate, typically every X ms and then update the frequency from a work-handler. This can lead to under-powering UI threads for the period of time during which the user begins interacting with a previously-idle system until the next sample period happens. The 'interactive' governor uses a different approach. A real-time thread is used for scaling up, giving the remaining tasks the CPU performance benefit, unlike existing governors which are more likely to schedule ramp-up work to occur after your performance starved tasks have completed. The Android version of interactive governor also checks whether to scale the CPU frequency up soon after coming out of idle. When the CPU comes out of idle, the governor check if the CPU sampling is overdue or not. If yes, it immediately starts the sampling. Otherwise, the utilization hooks from the scheduler handle the sampling later. If the CPU is very busy from exiting idle to when the evaluation happens, then it assumes that the CPU is under-powered and ramps it to MAX speed. If the CPU was not sufficiently busy to immediately ramp to MAX speed, then the governor evaluates the CPU load since the last speed adjustment, choosing the highest value between that longer-term load or the short-term load since idle exit to determine the CPU speed to ramp to. Idle notifiers will be be handled later and are not included for now. The core of this code is written and maintained (in Android repositories) by Mike Chan and Todd Poyner over a long period of time. Vireshk has made changes to to the governor to align it with the current practices followed with mainline governors, like using utilization hooks from the scheduler and handling kobject (for governor's sysfs directory) in a race free manner. And of course this included general cleanup of the governor as well. Signed-off-by: Mike Chan Signed-off-by: Todd Poynor Signed-off-by: Viresh Kumar Change-Id: Ib5e8d1dab0fa3cc5ba79b7a554c8dde35435cbdb [AmitP: Cherry-picked this version from https://git.kernel.org/cgit/linux/kernel/git/vireshk/pm.git/log/?h=cpufreq/interactive-idle-notifier. Also refactored and folded https://lkml.org/lkml/2016/9/14/209 patch into this unified patch.] Signed-off-by: Amit Pundir --- Documentation/cpu-freq/governors.txt | 86 ++ drivers/cpufreq/Kconfig | 30 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/cpufreq_interactive.c | 1371 ++++++++++++++++++++ include/trace/events/cpufreq_interactive.h | 112 ++ 5 files changed, 1600 insertions(+) create mode 100644 drivers/cpufreq/cpufreq_interactive.c create mode 100644 include/trace/events/cpufreq_interactive.h diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index c15aa75f5227..0cf9a6bff6a5 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -28,6 +28,7 @@ Contents: 2.3 Userspace 2.4 Ondemand 2.5 Conservative +2.6 Interactive 3. The Governor Interface in the CPUfreq Core @@ -218,6 +219,91 @@ a decision on when to decrease the frequency while running in any speed. Load for frequency increase is still evaluated every sampling rate. +2.6 Interactive +--------------- + +The CPUfreq governor "interactive" is designed for latency-sensitive, +interactive workloads. This governor sets the CPU speed depending on +usage, similar to "ondemand" and "conservative" governors, but with a +different set of configurable behaviors. + +The tunable values for this governor are: + +above_hispeed_delay: When speed is at or above hispeed_freq, wait for +this long before raising speed in response to continued high load. +The format is a single delay value, optionally followed by pairs of +CPU speeds and the delay to use at or above those speeds. Colons can +be used between the speeds and associated delays for readability. For +example: + + 80000 1300000:200000 1500000:40000 + +uses delay 80000 uS until CPU speed 1.3 GHz, at which speed delay +200000 uS is used until speed 1.5 GHz, at which speed (and above) +delay 40000 uS is used. If speeds are specified these must appear in +ascending order. Default is 20000 uS. + +boost: If non-zero, immediately boost speed of all CPUs to at least +hispeed_freq until zero is written to this attribute. If zero, allow +CPU speeds to drop below hispeed_freq according to load as usual. +Default is zero. + +boostpulse: On each write, immediately boost speed of all CPUs to +hispeed_freq for at least the period of time specified by +boostpulse_duration, after which speeds are allowed to drop below +hispeed_freq according to load as usual. Its a write-only file. + +boostpulse_duration: Length of time to hold CPU speed at hispeed_freq +on a write to boostpulse, before allowing speed to drop according to +load as usual. Default is 80000 uS. + +go_hispeed_load: The CPU load at which to ramp to hispeed_freq. +Default is 99%. + +hispeed_freq: An intermediate "high speed" at which to initially ramp +when CPU load hits the value specified in go_hispeed_load. If load +stays high for the amount of time specified in above_hispeed_delay, +then speed may be bumped higher. Default is the maximum speed allowed +by the policy at governor initialization time. + +io_is_busy: If set, the governor accounts IO time as CPU busy time. + +min_sample_time: The minimum amount of time to spend at the current +frequency before ramping down. Default is 80000 uS. + +target_loads: CPU load values used to adjust speed to influence the +current CPU load toward that value. In general, the lower the target +load, the more often the governor will raise CPU speeds to bring load +below the target. The format is a single target load, optionally +followed by pairs of CPU speeds and CPU loads to target at or above +those speeds. Colons can be used between the speeds and associated +target loads for readability. For example: + + 85 1000000:90 1700000:99 + +targets CPU load 85% below speed 1GHz, 90% at or above 1GHz, until +1.7GHz and above, at which load 99% is targeted. If speeds are +specified these must appear in ascending order. Higher target load +values are typically specified for higher speeds, that is, target load +values also usually appear in an ascending order. The default is +target load 90% for all speeds. + +timer_rate: Sample rate for reevaluating CPU load when the CPU is not +idle. A deferrable timer is used, such that the CPU will not be woken +from idle to service this timer until something else needs to run. +(The maximum time to allow deferring this timer when not running at +minimum speed is configurable via timer_slack.) Default is 20000 uS. + +timer_slack: Maximum additional time to defer handling the governor +sampling timer beyond timer_rate when running at speeds above the +minimum. For platforms that consume additional power at idle when +CPUs are running at speeds greater than minimum, this places an upper +bound on how long the timer will be deferred prior to re-evaluating +load and dropping speed. For example, if timer_rate is 20000uS and +timer_slack is 10000uS then timers will be deferred for up to 30msec +when not at lowest speed. A value of -1 means defer timers +indefinitely at all speeds. Default is 80000 uS. + 3. The Governor Interface in the CPUfreq Core ============================================= diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index f98a90125aa5..17d7f54b4318 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -110,6 +110,16 @@ config CPU_FREQ_DEFAULT_GOV_SCHED cpu frequency using CPU utilization estimates from the scheduler. +config CPU_FREQ_DEFAULT_GOV_INTERACTIVE + bool "interactive" + select CPU_FREQ_GOV_INTERACTIVE + select CPU_FREQ_GOV_PERFORMANCE + help + Use the CPUFreq governor 'interactive' as default. This allows + you to get a full dynamic cpu frequency capable system by simply + loading your cpufreq low-level hardware driver, using the + 'interactive' governor for latency-sensitive workloads. + endchoice config CPU_FREQ_GOV_PERFORMANCE @@ -204,6 +214,26 @@ config CPU_FREQ_GOV_SCHED If in doubt, say N. +config CPU_FREQ_GOV_INTERACTIVE + tristate "'interactive' cpufreq policy governor" + depends on CPU_FREQ + select CPU_FREQ_GOV_ATTR_SET + select IRQ_WORK + help + 'interactive' - This driver adds a dynamic cpufreq policy governor + designed for latency-sensitive workloads. + + This governor attempts to reduce the latency of clock + increases so that the system is more responsive to + interactive workloads. + + To compile this driver as a module, choose M here: the + module will be called cpufreq_interactive. + + For details, take a look at linux/Documentation/cpu-freq. + + If in doubt, say N. + comment "CPU frequency scaling drivers" config CPUFREQ_DT diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 0a9b6a093646..f0c9905d68a5 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o +obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c new file mode 100644 index 000000000000..2fa2b50630d9 --- /dev/null +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -0,0 +1,1371 @@ +/* + * drivers/cpufreq/cpufreq_interactive.c + * + * Copyright (C) 2010-2016 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Mike Chan (mike@android.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +#define gov_attr_ro(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +#define gov_attr_wo(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0200, NULL, store_##_name) + +#define gov_attr_rw(_name) \ +static struct governor_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + +/* Separate instance required for each 'interactive' directory in sysfs */ +struct interactive_tunables { + struct gov_attr_set attr_set; + + /* Hi speed to bump to from lo speed when load burst (default max) */ + unsigned int hispeed_freq; + + /* Go to hi speed when CPU load at or above this value. */ +#define DEFAULT_GO_HISPEED_LOAD 99 + unsigned long go_hispeed_load; + + /* Target load. Lower values result in higher CPU speeds. */ + spinlock_t target_loads_lock; + unsigned int *target_loads; + int ntarget_loads; + + /* + * The minimum amount of time to spend at a frequency before we can ramp + * down. + */ +#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC) + unsigned long min_sample_time; + + /* The sample rate of the timer used to increase frequency */ + unsigned long sampling_rate; + + /* + * Wait this long before raising speed above hispeed, by default a + * single timer interval. + */ + spinlock_t above_hispeed_delay_lock; + unsigned int *above_hispeed_delay; + int nabove_hispeed_delay; + + /* Non-zero means indefinite speed boost active */ + int boost; + /* Duration of a boot pulse in usecs */ + int boostpulse_duration; + /* End time of boost pulse in ktime converted to usecs */ + u64 boostpulse_endtime; + bool boosted; + + /* + * Max additional time to wait in idle, beyond sampling_rate, at speeds + * above minimum before wakeup to reduce speed, or -1 if unnecessary. + */ +#define DEFAULT_TIMER_SLACK (4 * DEFAULT_SAMPLING_RATE) + unsigned long timer_slack_delay; + unsigned long timer_slack; + bool io_is_busy; +}; + +/* Separate instance required for each 'struct cpufreq_policy' */ +struct interactive_policy { + struct cpufreq_policy *policy; + struct interactive_tunables *tunables; + struct list_head tunables_hook; +}; + +/* Separate instance required for each CPU */ +struct interactive_cpu { + struct update_util_data update_util; + struct interactive_policy *ipolicy; + + struct irq_work irq_work; + u64 last_sample_time; + bool work_in_progress; + + struct rw_semaphore enable_sem; + struct timer_list slack_timer; + + spinlock_t load_lock; /* protects the next 4 fields */ + u64 time_in_idle; + u64 time_in_idle_timestamp; + u64 cputime_speedadj; + u64 cputime_speedadj_timestamp; + + spinlock_t target_freq_lock; /*protects target freq */ + unsigned int target_freq; + + unsigned int floor_freq; + u64 pol_floor_val_time; /* policy floor_validate_time */ + u64 loc_floor_val_time; /* per-cpu floor_validate_time */ + u64 pol_hispeed_val_time; /* policy hispeed_validate_time */ + u64 loc_hispeed_val_time; /* per-cpu hispeed_validate_time */ +}; + +static DEFINE_PER_CPU(struct interactive_cpu, interactive_cpu); + +/* Realtime thread handles frequency scaling */ +static struct task_struct *speedchange_task; +static cpumask_t speedchange_cpumask; +static spinlock_t speedchange_cpumask_lock; + +/* Target load. Lower values result in higher CPU speeds. */ +#define DEFAULT_TARGET_LOAD 90 +static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD}; + +#define DEFAULT_SAMPLING_RATE (20 * USEC_PER_MSEC) +#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_SAMPLING_RATE +static unsigned int default_above_hispeed_delay[] = { + DEFAULT_ABOVE_HISPEED_DELAY +}; + +/* Iterate over interactive policies for tunables */ +#define for_each_ipolicy(__ip) \ + list_for_each_entry(__ip, &tunables->attr_set.policy_list, tunables_hook) + +static struct interactive_tunables *global_tunables; +static DEFINE_MUTEX(global_tunables_lock); + +static inline void update_slack_delay(struct interactive_tunables *tunables) +{ + tunables->timer_slack_delay = usecs_to_jiffies(tunables->timer_slack + + tunables->sampling_rate); +} + +static bool timer_slack_required(struct interactive_cpu *icpu) +{ + struct interactive_policy *ipolicy = icpu->ipolicy; + struct interactive_tunables *tunables = ipolicy->tunables; + + if (tunables->timer_slack < 0) + return false; + + if (icpu->target_freq > ipolicy->policy->min) + return true; + + return false; +} + +static void gov_slack_timer_start(struct interactive_cpu *icpu, int cpu) +{ + struct interactive_tunables *tunables = icpu->ipolicy->tunables; + + icpu->slack_timer.expires = jiffies + tunables->timer_slack_delay; + add_timer_on(&icpu->slack_timer, cpu); +} + +static void gov_slack_timer_modify(struct interactive_cpu *icpu) +{ + struct interactive_tunables *tunables = icpu->ipolicy->tunables; + + mod_timer(&icpu->slack_timer, jiffies + tunables->timer_slack_delay); +} + +static void slack_timer_resched(struct interactive_cpu *icpu, int cpu, + bool modify) +{ + struct interactive_tunables *tunables = icpu->ipolicy->tunables; + unsigned long flags; + + spin_lock_irqsave(&icpu->load_lock, flags); + + icpu->time_in_idle = get_cpu_idle_time(cpu, + &icpu->time_in_idle_timestamp, + tunables->io_is_busy); + icpu->cputime_speedadj = 0; + icpu->cputime_speedadj_timestamp = icpu->time_in_idle_timestamp; + + if (timer_slack_required(icpu)) { + if (modify) + gov_slack_timer_modify(icpu); + else + gov_slack_timer_start(icpu, cpu); + } + + spin_unlock_irqrestore(&icpu->load_lock, flags); +} + +static unsigned int +freq_to_above_hispeed_delay(struct interactive_tunables *tunables, + unsigned int freq) +{ + unsigned long flags; + unsigned int ret; + int i; + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + + for (i = 0; i < tunables->nabove_hispeed_delay - 1 && + freq >= tunables->above_hispeed_delay[i + 1]; i += 2) + ; + + ret = tunables->above_hispeed_delay[i]; + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + + return ret; +} + +static unsigned int freq_to_targetload(struct interactive_tunables *tunables, + unsigned int freq) +{ + unsigned long flags; + unsigned int ret; + int i; + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + + for (i = 0; i < tunables->ntarget_loads - 1 && + freq >= tunables->target_loads[i + 1]; i += 2) + ; + + ret = tunables->target_loads[i]; + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + return ret; +} + +/* + * If increasing frequencies never map to a lower target load then + * choose_freq() will find the minimum frequency that does not exceed its + * target load given the current load. + */ +static unsigned int choose_freq(struct interactive_cpu *icpu, + unsigned int loadadjfreq) +{ + struct cpufreq_policy *policy = icpu->ipolicy->policy; + struct cpufreq_frequency_table *freq_table = policy->freq_table; + unsigned int prevfreq, freqmin = 0, freqmax = UINT_MAX, tl; + unsigned int freq = policy->cur; + int index; + + do { + prevfreq = freq; + tl = freq_to_targetload(icpu->ipolicy->tunables, freq); + + /* + * Find the lowest frequency where the computed load is less + * than or equal to the target load. + */ + + index = cpufreq_frequency_table_target(policy, loadadjfreq / tl, + CPUFREQ_RELATION_L); + + freq = freq_table[index].frequency; + + if (freq > prevfreq) { + /* The previous frequency is too low */ + freqmin = prevfreq; + + if (freq < freqmax) + continue; + + /* Find highest frequency that is less than freqmax */ + index = cpufreq_frequency_table_target(policy, + freqmax - 1, CPUFREQ_RELATION_H); + + freq = freq_table[index].frequency; + + if (freq == freqmin) { + /* + * The first frequency below freqmax has already + * been found to be too low. freqmax is the + * lowest speed we found that is fast enough. + */ + freq = freqmax; + break; + } + } else if (freq < prevfreq) { + /* The previous frequency is high enough. */ + freqmax = prevfreq; + + if (freq > freqmin) + continue; + + /* Find lowest frequency that is higher than freqmin */ + index = cpufreq_frequency_table_target(policy, + freqmin + 1, CPUFREQ_RELATION_L); + + freq = freq_table[index].frequency; + + /* + * If freqmax is the first frequency above + * freqmin then we have already found that + * this speed is fast enough. + */ + if (freq == freqmax) + break; + } + + /* If same frequency chosen as previous then done. */ + } while (freq != prevfreq); + + return freq; +} + +static u64 update_load(struct interactive_cpu *icpu, int cpu) +{ + struct interactive_tunables *tunables = icpu->ipolicy->tunables; + unsigned int delta_idle, delta_time; + u64 now_idle, now, active_time; + + now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy); + delta_idle = (unsigned int)(now_idle - icpu->time_in_idle); + delta_time = (unsigned int)(now - icpu->time_in_idle_timestamp); + + if (delta_time <= delta_idle) + active_time = 0; + else + active_time = delta_time - delta_idle; + + icpu->cputime_speedadj += active_time * icpu->ipolicy->policy->cur; + + icpu->time_in_idle = now_idle; + icpu->time_in_idle_timestamp = now; + + return now; +} + +/* Re-evaluate load to see if a frequency change is required or not */ +static void eval_target_freq(struct interactive_cpu *icpu) +{ + struct interactive_tunables *tunables = icpu->ipolicy->tunables; + struct cpufreq_policy *policy = icpu->ipolicy->policy; + struct cpufreq_frequency_table *freq_table = policy->freq_table; + u64 cputime_speedadj, now, max_fvtime; + unsigned int new_freq, loadadjfreq, index, delta_time; + unsigned long flags; + int cpu_load; + int cpu = smp_processor_id(); + + spin_lock_irqsave(&icpu->load_lock, flags); + now = update_load(icpu, smp_processor_id()); + delta_time = (unsigned int)(now - icpu->cputime_speedadj_timestamp); + cputime_speedadj = icpu->cputime_speedadj; + spin_unlock_irqrestore(&icpu->load_lock, flags); + + if (WARN_ON_ONCE(!delta_time)) + return; + + spin_lock_irqsave(&icpu->target_freq_lock, flags); + do_div(cputime_speedadj, delta_time); + loadadjfreq = (unsigned int)cputime_speedadj * 100; + cpu_load = loadadjfreq / policy->cur; + tunables->boosted = tunables->boost || + now < tunables->boostpulse_endtime; + + if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) { + if (policy->cur < tunables->hispeed_freq) { + new_freq = tunables->hispeed_freq; + } else { + new_freq = choose_freq(icpu, loadadjfreq); + + if (new_freq < tunables->hispeed_freq) + new_freq = tunables->hispeed_freq; + } + } else { + new_freq = choose_freq(icpu, loadadjfreq); + if (new_freq > tunables->hispeed_freq && + policy->cur < tunables->hispeed_freq) + new_freq = tunables->hispeed_freq; + } + + if (policy->cur >= tunables->hispeed_freq && + new_freq > policy->cur && + now - icpu->pol_hispeed_val_time < freq_to_above_hispeed_delay(tunables, policy->cur)) { + trace_cpufreq_interactive_notyet(cpu, cpu_load, + icpu->target_freq, policy->cur, new_freq); + goto exit; + } + + icpu->loc_hispeed_val_time = now; + + index = cpufreq_frequency_table_target(policy, new_freq, + CPUFREQ_RELATION_L); + new_freq = freq_table[index].frequency; + + /* + * Do not scale below floor_freq unless we have been at or above the + * floor frequency for the minimum sample time since last validated. + */ + max_fvtime = max(icpu->pol_floor_val_time, icpu->loc_floor_val_time); + if (new_freq < icpu->floor_freq && icpu->target_freq >= policy->cur) { + if (now - max_fvtime < tunables->min_sample_time) { + trace_cpufreq_interactive_notyet(cpu, cpu_load, + icpu->target_freq, policy->cur, new_freq); + goto exit; + } + } + + /* + * Update the timestamp for checking whether speed has been held at + * or above the selected frequency for a minimum of min_sample_time, + * if not boosted to hispeed_freq. If boosted to hispeed_freq then we + * allow the speed to drop as soon as the boostpulse duration expires + * (or the indefinite boost is turned off). + */ + + if (!tunables->boosted || new_freq > tunables->hispeed_freq) { + icpu->floor_freq = new_freq; + if (icpu->target_freq >= policy->cur || new_freq >= policy->cur) + icpu->loc_floor_val_time = now; + } + + if (icpu->target_freq == new_freq && + icpu->target_freq <= policy->cur) { + trace_cpufreq_interactive_already(cpu, cpu_load, + icpu->target_freq, policy->cur, new_freq); + goto exit; + } + + trace_cpufreq_interactive_target(cpu, cpu_load, icpu->target_freq, + policy->cur, new_freq); + + icpu->target_freq = new_freq; + spin_unlock_irqrestore(&icpu->target_freq_lock, flags); + + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + cpumask_set_cpu(cpu, &speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + + wake_up_process(speedchange_task); + return; + +exit: + spin_unlock_irqrestore(&icpu->target_freq_lock, flags); +} + +static void cpufreq_interactive_update(struct interactive_cpu *icpu) +{ + eval_target_freq(icpu); + slack_timer_resched(icpu, smp_processor_id(), true); +} + +static void cpufreq_interactive_get_policy_info(struct cpufreq_policy *policy, + unsigned int *pmax_freq, + u64 *phvt, u64 *pfvt) +{ + struct interactive_cpu *icpu; + u64 hvt = ~0ULL, fvt = 0; + unsigned int max_freq = 0, i; + + for_each_cpu(i, policy->cpus) { + icpu = &per_cpu(interactive_cpu, i); + + fvt = max(fvt, icpu->loc_floor_val_time); + if (icpu->target_freq > max_freq) { + max_freq = icpu->target_freq; + hvt = icpu->loc_hispeed_val_time; + } else if (icpu->target_freq == max_freq) { + hvt = min(hvt, icpu->loc_hispeed_val_time); + } + } + + *pmax_freq = max_freq; + *phvt = hvt; + *pfvt = fvt; +} + +static void cpufreq_interactive_adjust_cpu(unsigned int cpu, + struct cpufreq_policy *policy) +{ + struct interactive_cpu *icpu; + u64 hvt, fvt; + unsigned int max_freq; + int i; + + cpufreq_interactive_get_policy_info(policy, &max_freq, &hvt, &fvt); + + for_each_cpu(i, policy->cpus) { + icpu = &per_cpu(interactive_cpu, i); + icpu->pol_floor_val_time = fvt; + } + + if (max_freq != policy->cur) { + __cpufreq_driver_target(policy, max_freq, CPUFREQ_RELATION_H); + for_each_cpu(i, policy->cpus) { + icpu = &per_cpu(interactive_cpu, i); + icpu->pol_hispeed_val_time = hvt; + } + } + + trace_cpufreq_interactive_setspeed(cpu, max_freq, policy->cur); +} + +static int cpufreq_interactive_speedchange_task(void *data) +{ + unsigned int cpu; + cpumask_t tmp_mask; + unsigned long flags; + +again: + set_current_state(TASK_INTERRUPTIBLE); + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + + if (cpumask_empty(&speedchange_cpumask)) { + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + schedule(); + + if (kthread_should_stop()) + return 0; + + spin_lock_irqsave(&speedchange_cpumask_lock, flags); + } + + set_current_state(TASK_RUNNING); + tmp_mask = speedchange_cpumask; + cpumask_clear(&speedchange_cpumask); + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags); + + for_each_cpu(cpu, &tmp_mask) { + struct interactive_cpu *icpu = &per_cpu(interactive_cpu, cpu); + struct cpufreq_policy *policy = icpu->ipolicy->policy; + + if (unlikely(!down_read_trylock(&icpu->enable_sem))) + continue; + + if (likely(icpu->ipolicy)) + cpufreq_interactive_adjust_cpu(cpu, policy); + + up_read(&icpu->enable_sem); + } + + goto again; +} + +static void cpufreq_interactive_boost(struct interactive_tunables *tunables) +{ + struct interactive_policy *ipolicy; + struct cpufreq_policy *policy; + struct interactive_cpu *icpu; + unsigned long flags[2]; + bool wakeup = false; + int i; + + tunables->boosted = true; + + spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]); + + for_each_ipolicy(ipolicy) { + policy = ipolicy->policy; + + for_each_cpu(i, policy->cpus) { + icpu = &per_cpu(interactive_cpu, i); + + if (!down_read_trylock(&icpu->enable_sem)) + continue; + + if (!icpu->ipolicy) { + up_read(&icpu->enable_sem); + continue; + } + + spin_lock_irqsave(&icpu->target_freq_lock, flags[1]); + if (icpu->target_freq < tunables->hispeed_freq) { + icpu->target_freq = tunables->hispeed_freq; + cpumask_set_cpu(i, &speedchange_cpumask); + icpu->pol_hispeed_val_time = ktime_to_us(ktime_get()); + wakeup = true; + } + spin_unlock_irqrestore(&icpu->target_freq_lock, flags[1]); + + up_read(&icpu->enable_sem); + } + } + + spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]); + + if (wakeup) + wake_up_process(speedchange_task); +} + +static int cpufreq_interactive_notifier(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct interactive_cpu *icpu = &per_cpu(interactive_cpu, freq->cpu); + unsigned long flags; + + if (val != CPUFREQ_POSTCHANGE) + return 0; + + if (!down_read_trylock(&icpu->enable_sem)) + return 0; + + if (!icpu->ipolicy) { + up_read(&icpu->enable_sem); + return 0; + } + + spin_lock_irqsave(&icpu->load_lock, flags); + update_load(icpu, freq->cpu); + spin_unlock_irqrestore(&icpu->load_lock, flags); + + up_read(&icpu->enable_sem); + + return 0; +} + +static struct notifier_block cpufreq_notifier_block = { + .notifier_call = cpufreq_interactive_notifier, +}; + +static unsigned int *get_tokenized_data(const char *buf, int *num_tokens) +{ + const char *cp = buf; + int ntokens = 1, i = 0; + unsigned int *tokenized_data; + int err = -EINVAL; + + while ((cp = strpbrk(cp + 1, " :"))) + ntokens++; + + if (!(ntokens & 0x1)) + goto err; + + tokenized_data = kcalloc(ntokens, sizeof(*tokenized_data), GFP_KERNEL); + if (!tokenized_data) { + err = -ENOMEM; + goto err; + } + + cp = buf; + while (i < ntokens) { + if (kstrtouint(cp, 0, &tokenized_data[i++]) < 0) + goto err_kfree; + + cp = strpbrk(cp, " :"); + if (!cp) + break; + cp++; + } + + if (i != ntokens) + goto err_kfree; + + *num_tokens = ntokens; + return tokenized_data; + +err_kfree: + kfree(tokenized_data); +err: + return ERR_PTR(err); +} + +/* Interactive governor sysfs interface */ +static struct interactive_tunables *to_tunables(struct gov_attr_set *attr_set) +{ + return container_of(attr_set, struct interactive_tunables, attr_set); +} + +#define show_one(file_name, type) \ +static ssize_t show_##file_name(struct gov_attr_set *attr_set, char *buf) \ +{ \ + struct interactive_tunables *tunables = to_tunables(attr_set); \ + return sprintf(buf, type "\n", tunables->file_name); \ +} + +static ssize_t show_target_loads(struct gov_attr_set *attr_set, char *buf) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long flags; + ssize_t ret = 0; + int i; + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + + for (i = 0; i < tunables->ntarget_loads; i++) + ret += sprintf(buf + ret, "%u%s", tunables->target_loads[i], + i & 0x1 ? ":" : " "); + + sprintf(buf + ret - 1, "\n"); + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + + return ret; +} + +static ssize_t store_target_loads(struct gov_attr_set *attr_set, + const char *buf, size_t count) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned int *new_target_loads; + unsigned long flags; + int ntokens; + + new_target_loads = get_tokenized_data(buf, &ntokens); + if (IS_ERR(new_target_loads)) + return PTR_ERR(new_target_loads); + + spin_lock_irqsave(&tunables->target_loads_lock, flags); + if (tunables->target_loads != default_target_loads) + kfree(tunables->target_loads); + tunables->target_loads = new_target_loads; + tunables->ntarget_loads = ntokens; + spin_unlock_irqrestore(&tunables->target_loads_lock, flags); + + return count; +} + +static ssize_t show_above_hispeed_delay(struct gov_attr_set *attr_set, + char *buf) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long flags; + ssize_t ret = 0; + int i; + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + + for (i = 0; i < tunables->nabove_hispeed_delay; i++) + ret += sprintf(buf + ret, "%u%s", + tunables->above_hispeed_delay[i], + i & 0x1 ? ":" : " "); + + sprintf(buf + ret - 1, "\n"); + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + + return ret; +} + +static ssize_t store_above_hispeed_delay(struct gov_attr_set *attr_set, + const char *buf, size_t count) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned int *new_above_hispeed_delay = NULL; + unsigned long flags; + int ntokens; + + new_above_hispeed_delay = get_tokenized_data(buf, &ntokens); + if (IS_ERR(new_above_hispeed_delay)) + return PTR_ERR(new_above_hispeed_delay); + + spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags); + if (tunables->above_hispeed_delay != default_above_hispeed_delay) + kfree(tunables->above_hispeed_delay); + tunables->above_hispeed_delay = new_above_hispeed_delay; + tunables->nabove_hispeed_delay = ntokens; + spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags); + + return count; +} + +static ssize_t store_hispeed_freq(struct gov_attr_set *attr_set, + const char *buf, size_t count) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long int val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->hispeed_freq = val; + + return count; +} + +static ssize_t store_go_hispeed_load(struct gov_attr_set *attr_set, + const char *buf, size_t count) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->go_hispeed_load = val; + + return count; +} + +static ssize_t store_min_sample_time(struct gov_attr_set *attr_set, + const char *buf, size_t count) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->min_sample_time = val; + + return count; +} + +static ssize_t show_timer_rate(struct gov_attr_set *attr_set, char *buf) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + + return sprintf(buf, "%lu\n", tunables->sampling_rate); +} + +static ssize_t store_timer_rate(struct gov_attr_set *attr_set, const char *buf, + size_t count) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long val, val_round; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + val_round = jiffies_to_usecs(usecs_to_jiffies(val)); + if (val != val_round) + pr_warn("timer_rate not aligned to jiffy. Rounded up to %lu\n", + val_round); + + tunables->sampling_rate = val_round; + + return count; +} + +static ssize_t store_timer_slack(struct gov_attr_set *attr_set, const char *buf, + size_t count) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long val; + int ret; + + ret = kstrtol(buf, 10, &val); + if (ret < 0) + return ret; + + tunables->timer_slack = val; + update_slack_delay(tunables); + + return count; +} + +static ssize_t store_boost(struct gov_attr_set *attr_set, const char *buf, + size_t count) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->boost = val; + + if (tunables->boost) { + trace_cpufreq_interactive_boost("on"); + if (!tunables->boosted) + cpufreq_interactive_boost(tunables); + } else { + tunables->boostpulse_endtime = ktime_to_us(ktime_get()); + trace_cpufreq_interactive_unboost("off"); + } + + return count; +} + +static ssize_t store_boostpulse(struct gov_attr_set *attr_set, const char *buf, + size_t count) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->boostpulse_endtime = ktime_to_us(ktime_get()) + + tunables->boostpulse_duration; + trace_cpufreq_interactive_boost("pulse"); + if (!tunables->boosted) + cpufreq_interactive_boost(tunables); + + return count; +} + +static ssize_t store_boostpulse_duration(struct gov_attr_set *attr_set, + const char *buf, size_t count) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->boostpulse_duration = val; + + return count; +} + +static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf, + size_t count) +{ + struct interactive_tunables *tunables = to_tunables(attr_set); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret < 0) + return ret; + + tunables->io_is_busy = val; + + return count; +} + +show_one(hispeed_freq, "%u"); +show_one(go_hispeed_load, "%lu"); +show_one(min_sample_time, "%lu"); +show_one(timer_slack, "%lu"); +show_one(boost, "%u"); +show_one(boostpulse_duration, "%u"); +show_one(io_is_busy, "%u"); + +gov_attr_rw(target_loads); +gov_attr_rw(above_hispeed_delay); +gov_attr_rw(hispeed_freq); +gov_attr_rw(go_hispeed_load); +gov_attr_rw(min_sample_time); +gov_attr_rw(timer_rate); +gov_attr_rw(timer_slack); +gov_attr_rw(boost); +gov_attr_wo(boostpulse); +gov_attr_rw(boostpulse_duration); +gov_attr_rw(io_is_busy); + +static struct attribute *interactive_attributes[] = { + &target_loads.attr, + &above_hispeed_delay.attr, + &hispeed_freq.attr, + &go_hispeed_load.attr, + &min_sample_time.attr, + &timer_rate.attr, + &timer_slack.attr, + &boost.attr, + &boostpulse.attr, + &boostpulse_duration.attr, + &io_is_busy.attr, + NULL +}; + +static struct kobj_type interactive_tunables_ktype = { + .default_attrs = interactive_attributes, + .sysfs_ops = &governor_sysfs_ops, +}; + +/* Interactive Governor callbacks */ +struct interactive_governor { + struct cpufreq_governor gov; + unsigned int usage_count; +}; + +static struct interactive_governor interactive_gov; + +#define CPU_FREQ_GOV_INTERACTIVE (&interactive_gov.gov) + +static void irq_work(struct irq_work *irq_work) +{ + struct interactive_cpu *icpu = container_of(irq_work, struct + interactive_cpu, irq_work); + + cpufreq_interactive_update(icpu); + icpu->work_in_progress = false; +} + +static void update_util_handler(struct update_util_data *data, u64 time, + unsigned int flags) +{ + struct interactive_cpu *icpu = container_of(data, + struct interactive_cpu, update_util); + struct interactive_policy *ipolicy = icpu->ipolicy; + struct interactive_tunables *tunables = ipolicy->tunables; + u64 delta_ns; + + /* + * The irq-work may not be allowed to be queued up right now. + * Possible reasons: + * - Work has already been queued up or is in progress. + * - It is too early (too little time from the previous sample). + */ + if (icpu->work_in_progress) + return; + + delta_ns = time - icpu->last_sample_time; + if ((s64)delta_ns < tunables->sampling_rate * NSEC_PER_USEC) + return; + + icpu->last_sample_time = time; + + icpu->work_in_progress = true; + irq_work_queue(&icpu->irq_work); +} + +static void gov_set_update_util(struct interactive_policy *ipolicy) +{ + struct cpufreq_policy *policy = ipolicy->policy; + struct interactive_cpu *icpu; + int cpu; + + for_each_cpu(cpu, policy->cpus) { + icpu = &per_cpu(interactive_cpu, cpu); + + icpu->last_sample_time = 0; + cpufreq_add_update_util_hook(cpu, &icpu->update_util, + update_util_handler); + } +} + +static inline void gov_clear_update_util(struct cpufreq_policy *policy) +{ + int i; + + for_each_cpu(i, policy->cpus) + cpufreq_remove_update_util_hook(i); + + synchronize_sched(); +} + +static void icpu_cancel_work(struct interactive_cpu *icpu) +{ + irq_work_sync(&icpu->irq_work); + icpu->work_in_progress = false; + del_timer_sync(&icpu->slack_timer); +} + +static struct interactive_policy * +interactive_policy_alloc(struct cpufreq_policy *policy) +{ + struct interactive_policy *ipolicy; + + ipolicy = kzalloc(sizeof(*ipolicy), GFP_KERNEL); + if (!ipolicy) + return NULL; + + ipolicy->policy = policy; + + return ipolicy; +} + +static void interactive_policy_free(struct interactive_policy *ipolicy) +{ + kfree(ipolicy); +} + +static struct interactive_tunables * +interactive_tunables_alloc(struct interactive_policy *ipolicy) +{ + struct interactive_tunables *tunables; + + tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); + if (!tunables) + return NULL; + + gov_attr_set_init(&tunables->attr_set, &ipolicy->tunables_hook); + if (!have_governor_per_policy()) + global_tunables = tunables; + + ipolicy->tunables = tunables; + + return tunables; +} + +static void interactive_tunables_free(struct interactive_tunables *tunables) +{ + if (!have_governor_per_policy()) + global_tunables = NULL; + + kfree(tunables); +} + +int cpufreq_interactive_init(struct cpufreq_policy *policy) +{ + struct interactive_policy *ipolicy; + struct interactive_tunables *tunables; + int ret; + + /* State should be equivalent to EXIT */ + if (policy->governor_data) + return -EBUSY; + + ipolicy = interactive_policy_alloc(policy); + if (!ipolicy) + return -ENOMEM; + + mutex_lock(&global_tunables_lock); + + if (global_tunables) { + if (WARN_ON(have_governor_per_policy())) { + ret = -EINVAL; + goto free_int_policy; + } + + policy->governor_data = ipolicy; + ipolicy->tunables = global_tunables; + + gov_attr_set_get(&global_tunables->attr_set, + &ipolicy->tunables_hook); + goto out; + } + + tunables = interactive_tunables_alloc(ipolicy); + if (!tunables) { + ret = -ENOMEM; + goto free_int_policy; + } + + tunables->hispeed_freq = policy->max; + tunables->above_hispeed_delay = default_above_hispeed_delay; + tunables->nabove_hispeed_delay = + ARRAY_SIZE(default_above_hispeed_delay); + tunables->go_hispeed_load = DEFAULT_GO_HISPEED_LOAD; + tunables->target_loads = default_target_loads; + tunables->ntarget_loads = ARRAY_SIZE(default_target_loads); + tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME; + tunables->boostpulse_duration = DEFAULT_MIN_SAMPLE_TIME; + tunables->sampling_rate = DEFAULT_SAMPLING_RATE; + tunables->timer_slack = DEFAULT_TIMER_SLACK; + update_slack_delay(tunables); + + spin_lock_init(&tunables->target_loads_lock); + spin_lock_init(&tunables->above_hispeed_delay_lock); + + policy->governor_data = ipolicy; + + ret = kobject_init_and_add(&tunables->attr_set.kobj, + &interactive_tunables_ktype, + get_governor_parent_kobj(policy), "%s", + interactive_gov.gov.name); + if (ret) + goto fail; + + /* One time initialization for governor */ + if (!interactive_gov.usage_count++) { + cpufreq_register_notifier(&cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + out: + mutex_unlock(&global_tunables_lock); + return 0; + + fail: + policy->governor_data = NULL; + interactive_tunables_free(tunables); + + free_int_policy: + mutex_unlock(&global_tunables_lock); + + interactive_policy_free(ipolicy); + pr_err("governor initialization failed (%d)\n", ret); + + return ret; +} + +void cpufreq_interactive_exit(struct cpufreq_policy *policy) +{ + struct interactive_policy *ipolicy = policy->governor_data; + struct interactive_tunables *tunables = ipolicy->tunables; + unsigned int count; + + mutex_lock(&global_tunables_lock); + + /* Last policy using the governor ? */ + if (!--interactive_gov.usage_count) { + cpufreq_unregister_notifier(&cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + count = gov_attr_set_put(&tunables->attr_set, &ipolicy->tunables_hook); + policy->governor_data = NULL; + if (!count) + interactive_tunables_free(tunables); + + mutex_unlock(&global_tunables_lock); + + interactive_policy_free(ipolicy); +} + +int cpufreq_interactive_start(struct cpufreq_policy *policy) +{ + struct interactive_policy *ipolicy = policy->governor_data; + struct interactive_cpu *icpu; + unsigned int cpu; + + for_each_cpu(cpu, policy->cpus) { + icpu = &per_cpu(interactive_cpu, cpu); + + icpu->target_freq = policy->cur; + icpu->floor_freq = icpu->target_freq; + icpu->pol_floor_val_time = ktime_to_us(ktime_get()); + icpu->loc_floor_val_time = icpu->pol_floor_val_time; + icpu->pol_hispeed_val_time = icpu->pol_floor_val_time; + icpu->loc_hispeed_val_time = icpu->pol_floor_val_time; + + down_write(&icpu->enable_sem); + icpu->ipolicy = ipolicy; + up_write(&icpu->enable_sem); + + slack_timer_resched(icpu, cpu, false); + } + + gov_set_update_util(ipolicy); + return 0; +} + +void cpufreq_interactive_stop(struct cpufreq_policy *policy) +{ + struct interactive_policy *ipolicy = policy->governor_data; + struct interactive_cpu *icpu; + unsigned int cpu; + + gov_clear_update_util(ipolicy->policy); + + for_each_cpu(cpu, policy->cpus) { + icpu = &per_cpu(interactive_cpu, cpu); + + icpu_cancel_work(icpu); + + down_write(&icpu->enable_sem); + icpu->ipolicy = NULL; + up_write(&icpu->enable_sem); + } +} + +void cpufreq_interactive_limits(struct cpufreq_policy *policy) +{ + struct interactive_cpu *icpu; + unsigned int cpu; + unsigned long flags; + + cpufreq_policy_apply_limits(policy); + + for_each_cpu(cpu, policy->cpus) { + icpu = &per_cpu(interactive_cpu, cpu); + + spin_lock_irqsave(&icpu->target_freq_lock, flags); + + if (policy->max < icpu->target_freq) + icpu->target_freq = policy->max; + else if (policy->min > icpu->target_freq) + icpu->target_freq = policy->min; + + spin_unlock_irqrestore(&icpu->target_freq_lock, flags); + } +} + +static struct interactive_governor interactive_gov = { + .gov = { + .name = "interactive", + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, + .init = cpufreq_interactive_init, + .exit = cpufreq_interactive_exit, + .start = cpufreq_interactive_start, + .stop = cpufreq_interactive_stop, + .limits = cpufreq_interactive_limits, + } +}; + +static void cpufreq_interactive_nop_timer(unsigned long data) +{ + /* + * The purpose of slack-timer is to wake up the CPU from IDLE, in order + * to decrease its frequency if it is not set to minimum already. + * + * This is important for platforms where CPU with higher frequencies + * consume higher power even at IDLE. + */ +} + +static int __init cpufreq_interactive_gov_init(void) +{ + struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; + struct interactive_cpu *icpu; + unsigned int cpu; + + for_each_possible_cpu(cpu) { + icpu = &per_cpu(interactive_cpu, cpu); + + init_irq_work(&icpu->irq_work, irq_work); + spin_lock_init(&icpu->load_lock); + spin_lock_init(&icpu->target_freq_lock); + init_rwsem(&icpu->enable_sem); + + /* Initialize per-cpu slack-timer */ + init_timer_pinned(&icpu->slack_timer); + icpu->slack_timer.function = cpufreq_interactive_nop_timer; + } + + spin_lock_init(&speedchange_cpumask_lock); + speedchange_task = kthread_create(cpufreq_interactive_speedchange_task, + NULL, "cfinteractive"); + if (IS_ERR(speedchange_task)) + return PTR_ERR(speedchange_task); + + sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m); + get_task_struct(speedchange_task); + + /* wake up so the thread does not look hung to the freezer */ + wake_up_process(speedchange_task); + + return cpufreq_register_governor(CPU_FREQ_GOV_INTERACTIVE); +} + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return CPU_FREQ_GOV_INTERACTIVE; +} + +fs_initcall(cpufreq_interactive_gov_init); +#else +module_init(cpufreq_interactive_gov_init); +#endif + +static void __exit cpufreq_interactive_gov_exit(void) +{ + cpufreq_unregister_governor(CPU_FREQ_GOV_INTERACTIVE); + kthread_stop(speedchange_task); + put_task_struct(speedchange_task); +} +module_exit(cpufreq_interactive_gov_exit); + +MODULE_AUTHOR("Mike Chan "); +MODULE_DESCRIPTION("'cpufreq_interactive' - A dynamic cpufreq governor for Latency sensitive workloads"); +MODULE_LICENSE("GPL"); diff --git a/include/trace/events/cpufreq_interactive.h b/include/trace/events/cpufreq_interactive.h new file mode 100644 index 000000000000..faecc0bfdeff --- /dev/null +++ b/include/trace/events/cpufreq_interactive.h @@ -0,0 +1,112 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cpufreq_interactive + +#if !defined(_TRACE_CPUFREQ_INTERACTIVE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_CPUFREQ_INTERACTIVE_H + +#include + +DECLARE_EVENT_CLASS(set, + TP_PROTO(u32 cpu_id, unsigned long targfreq, + unsigned long actualfreq), + TP_ARGS(cpu_id, targfreq, actualfreq), + + TP_STRUCT__entry( + __field(u32, cpu_id) + __field(unsigned long, targfreq) + __field(unsigned long, actualfreq) + ), + + TP_fast_assign( + __entry->cpu_id = (u32)cpu_id; + __entry->targfreq = targfreq; + __entry->actualfreq = actualfreq; + ), + + TP_printk("cpu=%u targ=%lu actual=%lu", + __entry->cpu_id, __entry->targfreq, + __entry->actualfreq) +); + +DEFINE_EVENT(set, cpufreq_interactive_setspeed, + TP_PROTO(u32 cpu_id, unsigned long targfreq, + unsigned long actualfreq), + TP_ARGS(cpu_id, targfreq, actualfreq) +); + +DECLARE_EVENT_CLASS(loadeval, + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg), + + TP_STRUCT__entry( + __field(unsigned long, cpu_id) + __field(unsigned long, load) + __field(unsigned long, curtarg) + __field(unsigned long, curactual) + __field(unsigned long, newtarg) + ), + + TP_fast_assign( + __entry->cpu_id = cpu_id; + __entry->load = load; + __entry->curtarg = curtarg; + __entry->curactual = curactual; + __entry->newtarg = newtarg; + ), + + TP_printk("cpu=%lu load=%lu cur=%lu actual=%lu targ=%lu", + __entry->cpu_id, __entry->load, __entry->curtarg, + __entry->curactual, __entry->newtarg) +); + +DEFINE_EVENT(loadeval, cpufreq_interactive_target, + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) +); + +DEFINE_EVENT(loadeval, cpufreq_interactive_already, + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) +); + +DEFINE_EVENT(loadeval, cpufreq_interactive_notyet, + TP_PROTO(unsigned long cpu_id, unsigned long load, + unsigned long curtarg, unsigned long curactual, + unsigned long newtarg), + TP_ARGS(cpu_id, load, curtarg, curactual, newtarg) +); + +TRACE_EVENT(cpufreq_interactive_boost, + TP_PROTO(const char *s), + TP_ARGS(s), + TP_STRUCT__entry( + __string(s, s) + ), + TP_fast_assign( + __assign_str(s, s); + ), + TP_printk("%s", __get_str(s)) +); + +TRACE_EVENT(cpufreq_interactive_unboost, + TP_PROTO(const char *s), + TP_ARGS(s), + TP_STRUCT__entry( + __string(s, s) + ), + TP_fast_assign( + __assign_str(s, s); + ), + TP_printk("%s", __get_str(s)) +); + +#endif /* _TRACE_CPUFREQ_INTERACTIVE_H */ + +/* This part must be outside protection */ +#include -- GitLab From 9dcfc2a39b200d4d89d15542f48b6f81d762b2d9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 19 May 2016 14:30:54 +0530 Subject: [PATCH 1236/1442] ANDROID: cpufreq: interactive: Use idle-end notifiers Signed-off-by: Viresh Kumar [AmitP: Cherry-picked this patch from https://git.kernel.org/cgit/linux/kernel/git/vireshk/pm.git/log/?h=cpufreq/interactive-idle-notifier] Signed-off-by: Amit Pundir --- drivers/cpufreq/cpufreq_interactive.c | 39 +++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 2fa2b50630d9..d6cac0e8e7a6 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -113,6 +113,7 @@ struct interactive_cpu { struct irq_work irq_work; u64 last_sample_time; + unsigned long next_sample_jiffies; bool work_in_progress; struct rw_semaphore enable_sem; @@ -471,6 +472,26 @@ static void cpufreq_interactive_update(struct interactive_cpu *icpu) slack_timer_resched(icpu, smp_processor_id(), true); } +static void cpufreq_interactive_idle_end(void) +{ + struct interactive_cpu *icpu = &per_cpu(interactive_cpu, + smp_processor_id()); + + if (!down_read_trylock(&icpu->enable_sem)) + return; + + if (icpu->ipolicy) { + /* + * We haven't sampled load for more than sampling_rate time, do + * it right now. + */ + if (time_after_eq(jiffies, icpu->next_sample_jiffies)) + cpufreq_interactive_update(icpu); + } + + up_read(&icpu->enable_sem); +} + static void cpufreq_interactive_get_policy_info(struct cpufreq_policy *policy, unsigned int *pmax_freq, u64 *phvt, u64 *pfvt) @@ -989,6 +1010,19 @@ static struct kobj_type interactive_tunables_ktype = { .sysfs_ops = &governor_sysfs_ops, }; +static int cpufreq_interactive_idle_notifier(struct notifier_block *nb, + unsigned long val, void *data) +{ + if (val == IDLE_END) + cpufreq_interactive_idle_end(); + + return 0; +} + +static struct notifier_block cpufreq_interactive_idle_nb = { + .notifier_call = cpufreq_interactive_idle_notifier, +}; + /* Interactive Governor callbacks */ struct interactive_governor { struct cpufreq_governor gov; @@ -1031,6 +1065,8 @@ static void update_util_handler(struct update_util_data *data, u64 time, return; icpu->last_sample_time = time; + icpu->next_sample_jiffies = usecs_to_jiffies(tunables->sampling_rate) + + jiffies; icpu->work_in_progress = true; irq_work_queue(&icpu->irq_work); @@ -1046,6 +1082,7 @@ static void gov_set_update_util(struct interactive_policy *ipolicy) icpu = &per_cpu(interactive_cpu, cpu); icpu->last_sample_time = 0; + icpu->next_sample_jiffies = 0; cpufreq_add_update_util_hook(cpu, &icpu->update_util, update_util_handler); } @@ -1176,6 +1213,7 @@ int cpufreq_interactive_init(struct cpufreq_policy *policy) /* One time initialization for governor */ if (!interactive_gov.usage_count++) { + idle_notifier_register(&cpufreq_interactive_idle_nb); cpufreq_register_notifier(&cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); } @@ -1209,6 +1247,7 @@ void cpufreq_interactive_exit(struct cpufreq_policy *policy) if (!--interactive_gov.usage_count) { cpufreq_unregister_notifier(&cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); + idle_notifier_unregister(&cpufreq_interactive_idle_nb); } count = gov_attr_set_put(&tunables->attr_set, &ipolicy->tunables_hook); -- GitLab From 0de4c4c1945b071f2563285ffc77226756347cc1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 30 Jan 2017 12:26:08 -0800 Subject: [PATCH 1237/1442] ANDROID: fs: Export free_fs_struct and set_fs_pwd allmodconfig builds fail with: ERROR: "free_fs_struct" undefined! ERROR: "set_fs_pwd" undefined! Export the missing symbols. Change-Id: I4877ead19d7e7f0c93d4c4cad5681364284323aa Fixes: 0ec03f845799 ("ANDROID: sdcardfs: override umask on mkdir and create") Signed-off-by: Guenter Roeck --- fs/fs_struct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 005dcb401369..940c683561dd 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -44,6 +44,7 @@ void set_fs_pwd(struct fs_struct *fs, const struct path *path) if (old_pwd.dentry) path_put(&old_pwd); } +EXPORT_SYMBOL(set_fs_pwd); static inline int replace_path(struct path *p, const struct path *old, const struct path *new) { @@ -89,6 +90,7 @@ void free_fs_struct(struct fs_struct *fs) path_put(&fs->pwd); kmem_cache_free(fs_cachep, fs); } +EXPORT_SYMBOL(free_fs_struct); void exit_fs(struct task_struct *tsk) { -- GitLab From f122216df533e208562ac4157518750808220e22 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 30 Jan 2017 12:29:00 -0800 Subject: [PATCH 1238/1442] ANDROID: fs: Export vfs_rmdir2 allmodconfig builds fail with ERROR: "vfs_rmdir2" undefined! Export the missing function. Change-Id: I983d327e59fd34e0484f3c54d925e97d3905c19c Fixes: f9cb61dcb00c ("ANDROID: sdcardfs: User new permission2 functions") Signed-off-by: Guenter Roeck --- fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index dc469b4648c2..8fa2ddc5ed35 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3892,6 +3892,8 @@ int vfs_rmdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry) d_delete(dentry); return error; } +EXPORT_SYMBOL(vfs_rmdir2); + int vfs_rmdir(struct inode *dir, struct dentry *dentry) { return vfs_rmdir2(NULL, dir, dentry); -- GitLab From 660cd193f86a0978fe1063924c8af9c2fafce1f8 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 25 Aug 2016 11:06:37 +0530 Subject: [PATCH 1239/1442] ANDROID: sched/walt: include missing header for arm_timer_read_counter() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include clocksource/arm_arch_timer.h to fix implicit function declaration of ‘arch_timer_read_counter’ build error for ARCH=arm. Change-Id: Ic1d18f62877c1e23413f00fd668eb4974dbe90ff Signed-off-by: Amit Pundir [jstultz: Cherry-picked from common/android-3.18] Signed-off-by: John Stultz --- kernel/sched/walt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 2bc5c1fdcd23..43733259f542 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "sched.h" #include "walt.h" -- GitLab From 5ea9de8ee9ee44d4fa73e75c7b03af2a6b62f49b Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Tue, 20 Sep 2016 17:00:47 +0100 Subject: [PATCH 1240/1442] ANDROID: sched/walt: Drop arch-specific timer access On at least one platform, occasionally the timer providing the wallclock was able to be reset/go backwards for at least some time after wakeup. Accept that this might happen and warn the first time, but otherwise just carry on. Change-Id: Id3164477ba79049561af7f0889cbeebc199ead4e Signed-off-by: Chris Redpath --- kernel/sched/walt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 43733259f542..7f686ffccbf0 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -22,7 +22,6 @@ #include #include #include -#include #include "sched.h" #include "walt.h" @@ -188,10 +187,8 @@ update_window_start(struct rq *rq, u64 wallclock) delta = wallclock - rq->window_start; /* If the MPM global timer is cleared, set delta as 0 to avoid kernel BUG happening */ if (delta < 0) { - if (arch_timer_read_counter() == 0) - delta = 0; - else - BUG_ON(1); + delta = 0; + WARN_ONCE(1, "WALT wallclock appears to have gone backwards or reset\n"); } if (delta < walt_ravg_window) -- GitLab From 544160b6ea18670196d1173c099f2cced5075132 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jan 2017 15:18:24 -0800 Subject: [PATCH 1241/1442] fbdev: color map copying bounds checking commit 2dc705a9930b4806250fbf5a76e55266e59389f2 upstream. Copying color maps to userspace doesn't check the value of to->start, which will cause kernel heap buffer OOB read due to signedness wraps. CVE-2016-8405 Link: http://lkml.kernel.org/r/20170105224249.GA50925@beast Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kees Cook Reported-by: Peter Pi (@heisecode) of Trend Micro Cc: Min Chong Cc: Dan Carpenter Cc: Tomi Valkeinen Cc: Bartlomiej Zolnierkiewicz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbcmap.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/video/fbdev/core/fbcmap.c b/drivers/video/fbdev/core/fbcmap.c index f89245b8ba8e..68a113594808 100644 --- a/drivers/video/fbdev/core/fbcmap.c +++ b/drivers/video/fbdev/core/fbcmap.c @@ -163,17 +163,18 @@ void fb_dealloc_cmap(struct fb_cmap *cmap) int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) { - int tooff = 0, fromoff = 0; - int size; + unsigned int tooff = 0, fromoff = 0; + size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; - size = to->len - tooff; - if (size > (int) (from->len - fromoff)) - size = from->len - fromoff; - if (size <= 0) + if (fromoff >= from->len || tooff >= to->len) + return -EINVAL; + + size = min_t(size_t, to->len - tooff, from->len - fromoff); + if (size == 0) return -EINVAL; size *= sizeof(u16); @@ -187,17 +188,18 @@ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to) { - int tooff = 0, fromoff = 0; - int size; + unsigned int tooff = 0, fromoff = 0; + size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; - size = to->len - tooff; - if (size > (int) (from->len - fromoff)) - size = from->len - fromoff; - if (size <= 0) + if (fromoff >= from->len || tooff >= to->len) + return -EINVAL; + + size = min_t(size_t, to->len - tooff, from->len - fromoff); + if (size == 0) return -EINVAL; size *= sizeof(u16); -- GitLab From e4be4d4942b92cdd2d3e87ed178269bcd5844526 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 6 Jan 2017 17:54:51 +0000 Subject: [PATCH 1242/1442] tile/ptrace: Preserve previous registers for short regset write commit fd7c99142d77dc4a851879a66715abf12a3193fb upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Signed-off-by: Dave Martin Signed-off-by: Chris Metcalf Signed-off-by: Greg Kroah-Hartman --- arch/tile/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index d89b7011667c..e279572824b1 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -111,7 +111,7 @@ static int tile_gpr_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { int ret; - struct pt_regs regs; + struct pt_regs regs = *task_pt_regs(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®s, 0, sizeof(regs)); -- GitLab From 2abb7f408f7cfb9af9218e74507f5f44af154302 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 9 Jan 2017 16:31:58 +0200 Subject: [PATCH 1243/1442] drm: Schedule the output_poll_work with 1s delay if we have delayed event commit 68f458eec7069d618a6c884ca007426e0cea411b upstream. Instead of scheduling the work to handle the initial delayed event, use 1s delay. This delay should not be needed, but Optimus/nouveau will fail in a mysterious way if the delayed event is handled as soon as possible like it is done in drm_helper_probe_single_connector_modes() in case the poll was enabled before. Reverting 339fd36238dd would give back the 10 sec (!) delay to handle the delayed event. Adding 1sec delay to the poll_work is enough to work around the issue in Optimus setups and gives shorter response on handling the initial delayed event. Fixes: 339fd36238dd ("drm: drm_probe_helper: Fix output_poll_work scheduling") Signed-off-by: Peter Ujfalusi [danvet: Add FIXME to the comment to make it stick out more.] Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170109143158.21917-1-peter.ujfalusi@ti.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_probe_helper.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index f6b64d7d3528..276474d13763 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -143,8 +143,18 @@ void drm_kms_helper_poll_enable_locked(struct drm_device *dev) } if (dev->mode_config.delayed_event) { + /* + * FIXME: + * + * Use short (1s) delay to handle the initial delayed event. + * This delay should not be needed, but Optimus/nouveau will + * fail in a mysterious way if the delayed event is handled as + * soon as possible like it is done in + * drm_helper_probe_single_connector_modes() in case the poll + * was enabled before. + */ poll = true; - delay = 0; + delay = HZ; } if (poll) -- GitLab From f1dc9aaee02950e95cc045209d9bf07955bc16d8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Jan 2017 15:56:14 +0100 Subject: [PATCH 1244/1442] drm: Fix broken VT switch with video=1366x768 option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fdf35a6b22247746a7053fc764d04218a9306f82 upstream. I noticed that the VT switch doesn't work any longer with a Dell laptop with 1366x768 eDP when the machine is connected with a DP monitor. It behaves as if VT were switched, but the graphics remain frozen. Actually the keyboard works, so I could switch back to VT7 again. I tried to track down the problem, and encountered a long story until we reach to this error: - The machine is booted with video=1366x768 option (the distro installer seems to add it as default). - Recently, drm_helper_probe_single_connector_modes() deals with cmdline modes, and it tries to create a new mode when no matching mode is found. - The drm_mode_create_from_cmdline_mode() creates a mode based on either CVT of GFT according to the given cmdline mode; in our case, it's 1366x768. - Since both CVT and GFT can't express the width 1366 due to alignment, the resultant mode becomes 1368x768, slightly larger than the given size. - Later on, the atomic commit is performed, and in drm_atomic_check_only(), the size of each plane is checked. - The size check of 1366x768 fails due to the above, and eventually the whole VT switch fails. Back in the history, we've had a manual fix-up of 1368x768 in various places via c09dedb7a50e ("drm/edid: Add a workaround for 1366x768 HD panel"), but they have been all in drm_edid.c at probing the modes from EDID. For addressing the problem above, we need a similar hack to the mode newly created from cmdline, manually adjusting the width when the expected size is 1366 while we get 1368 instead. Fixes: eaf99c749d43 ("drm: Perform cmdline mode parsing during...") Signed-off-by: Takashi Iwai Link: http://patchwork.freedesktop.org/patch/msgid/20170109145614.29454-1-tiwai@suse.de Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_modes.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 53f07ac7c174..e14366de0e6e 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1462,6 +1462,13 @@ drm_mode_create_from_cmdline_mode(struct drm_device *dev, return NULL; mode->type |= DRM_MODE_TYPE_USERDEF; + /* fix up 1368x768: GFT/CVT can't express 1366 width due to alignment */ + if (cmd->xres == 1366 && mode->hdisplay == 1368) { + mode->hdisplay = 1366; + mode->hsync_start--; + mode->hsync_end--; + drm_mode_set_name(mode); + } drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V); return mode; } -- GitLab From 4c741e2adb351a44d1f4bc210aafd382939ac7ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 7 Nov 2016 22:20:54 +0200 Subject: [PATCH 1245/1442] drm/i915: Ignore bogus plane coordinates on SKL when the plane is not visible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3bfdfdcbce2796ce75bf2d85fd8471858d702e5d upstream. When the plane is invisible we may have all sorts of bogus stuff in the coordinates, which we must ignore or else we might fail the plane update. This started to happen on SKL when I moved the plane offset computation to happen in the check phase. Previously we happily ignored it all since we never called the update_plane hook with an invisible plane. Cc: Sivakumar Thulasimani Cc: drm-intel-fixes@lists.freedesktop.org Fixes: b63a16f6cd89 ("drm/i915: Compute display surface offset in the plane check hook for SKL+") Signed-off-by: Ville Syrjälä Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98258 Testcase: igt/pm_rpm/legacy-planes Testcase: igt/pm_rpm/universal-planes Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/1478550057-24864-3-git-send-email-ville.syrjala@linux.intel.com (cherry picked from commit a5e4c7d0aa6784d8abe95c3ceef0da9656d17468) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 869b29fe9ec4..3c9cb35e46ab 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2975,6 +2975,9 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) unsigned int rotation = plane_state->base.rotation; int ret; + if (!plane_state->base.visible) + return 0; + /* Rotate src coordinates to match rotated GTT view */ if (intel_rotation_90_or_270(rotation)) drm_rect_rotate(&plane_state->base.src, -- GitLab From 32600835ebe1bd972d01ce7f331b87bd80705d30 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 10 Oct 2016 09:44:06 -0700 Subject: [PATCH 1246/1442] drm/vc4: Fix memory leak of the CRTC state. commit 7622b25543665567d8830a63210385b7d705924b upstream. The underscores variant frees the pointers inside, while the no-underscores variant calls underscores and then frees the struct. Signed-off-by: Eric Anholt Fixes: d8dbf44f13b9 ("drm/vc4: Make the CRTCs cooperate on allocating display lists.") Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 7f08d681a74b..d544ff9b0d46 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -832,7 +832,7 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc, } - __drm_atomic_helper_crtc_destroy_state(state); + drm_atomic_helper_crtc_destroy_state(crtc, state); } static const struct drm_crtc_funcs vc4_crtc_funcs = { -- GitLab From b9edac54cb85da589ca809bf8dcf86e5cd3f41c0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 17 Jan 2017 21:42:53 +1100 Subject: [PATCH 1247/1442] drm/vc4: Fix an integer overflow in temporary allocation layout. commit 0f2ff82e11c86c05d051cae32b58226392d33bbf upstream. We copy the unvalidated ioctl arguments from the user into kernel temporary memory to run the validation from, to avoid a race where the user updates the unvalidate contents in between validating them and copying them into the validated BO. However, in setting up the layout of the kernel side, we failed to check one of the additions (the roundup() for shader_rec_offset) against integer overflow, allowing a nearly MAX_UINT value of bin_cl_size to cause us to under-allocate the temporary space that we then copy_from_user into. Reported-by: Murray McAllister Signed-off-by: Eric Anholt Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 303f23c96220..39ef6743d3b8 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -594,7 +594,8 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) args->shader_rec_count); struct vc4_bo *bo; - if (uniforms_offset < shader_rec_offset || + if (shader_rec_offset < args->bin_cl_size || + uniforms_offset < shader_rec_offset || exec_size < uniforms_offset || args->shader_rec_count >= (UINT_MAX / sizeof(struct vc4_shader_state)) || -- GitLab From cfba2a001d0e36905016bb4f87fc47245c944c36 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 17 Jan 2017 21:58:06 +1100 Subject: [PATCH 1248/1442] drm/vc4: Return -EINVAL on the overflow checks failing. commit 6b8ac63847bc2f958dd93c09edc941a0118992d9 upstream. By failing to set the errno, we'd continue on to trying to set up the RCL, and then oops on trying to dereference the tile_bo that binning validation should have set up. Reported-by: Ingo Molnar Signed-off-by: Eric Anholt Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 39ef6743d3b8..18e37171e9c8 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -601,6 +601,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) sizeof(struct vc4_shader_state)) || temp_size < exec_size) { DRM_ERROR("overflow in exec arguments\n"); + ret = -EINVAL; goto fail; } -- GitLab From 5270c017f19f32812d7d83ba6e45873b8db503d0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Jan 2017 10:49:00 +0300 Subject: [PATCH 1249/1442] drm/vc4: fix a bounds check commit 21ccc32496b2f63228f5232b3ac0e426e8fb3c31 upstream. We accidentally return success even if vc4_full_res_bounds_check() fails. Fixes: d5b1a78a772f ("drm/vc4: Add support for drawing 3D frames.") Signed-off-by: Dan Carpenter Reviewed-by: Eric Engestrom Reviewed-by: Eric Anholt Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vc4/vc4_render_cl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c index 08886a309757..5cdd003605f5 100644 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -461,7 +461,7 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, } ret = vc4_full_res_bounds_check(exec, *obj, surf); - if (!ret) + if (ret) return ret; return 0; -- GitLab From bbae3c4525966606db790ac7b7910b8639b5da42 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 25 Jan 2017 12:00:29 -0500 Subject: [PATCH 1250/1442] Revert "drm/radeon: always apply pci shutdown callbacks" commit b9b487e494712c8e5905b724e12f5ef17e9ae6f9 upstream. This seems to break reboot on some evergreen systems. bugs: https://bugs.freedesktop.org/show_bug.cgi?id=99524 https://bugzilla.kernel.org/show_bug.cgi?id=192271 This reverts commit a481daa88fd4d6b54f25348972bba10b5f6a84d0. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_drv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 00ea0002b539..e0c143b865f3 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -366,11 +366,10 @@ static void radeon_pci_shutdown(struct pci_dev *pdev) { /* if we are running in a VM, make sure the device - * torn down properly on reboot/shutdown. - * unfortunately we can't detect certain - * hypervisors so just do this all the time. + * torn down properly on reboot/shutdown */ - radeon_pci_remove(pdev); + if (radeon_device_is_virtual()) + radeon_pci_remove(pdev); } static int radeon_pmops_suspend(struct device *dev) -- GitLab From a2104c7cd3b24c4329b6193f7ec0882ce612f110 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 27 Jan 2017 11:33:04 +0100 Subject: [PATCH 1251/1442] drm/atomic: clear out fence when duplicating state [Fixed differently in 4.10] The fence needs to be cleared out, otherwise the following commit might wait on a stale fence from the previous commit. This was fixed as a side effect of 9626014258a5 (drm/fence: add in-fences support) in kernel 4.10. As this commit introduces new functionality and as such can not be applied to stable, this patch is the minimal fix for the kernel 4.9 stable series. Signed-off-by: Lucas Stach Reviewed-by: Daniel Vetter Tested-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic_helper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 338766c64c99..a05bb3891119 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -3115,6 +3115,8 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane, if (state->fb) drm_framebuffer_reference(state->fb); + + state->fence = NULL; } EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state); -- GitLab From 6676aa65464f23e32efe1857a51fe341dfb10e03 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 24 Jan 2017 15:17:48 -0800 Subject: [PATCH 1252/1442] mm/huge_memory.c: respect FOLL_FORCE/FOLL_COW for thp commit 8310d48b125d19fcd9521d83b8293e63eb1646aa upstream. In commit 19be0eaffa3a ("mm: remove gup_flags FOLL_WRITE games from __get_user_pages()"), the mm code was changed from unsetting FOLL_WRITE after a COW was resolved to setting the (newly introduced) FOLL_COW instead. Simultaneously, the check in gup.c was updated to still allow writes with FOLL_FORCE set if FOLL_COW had also been set. However, a similar check in huge_memory.c was forgotten. As a result, remote memory writes to ro regions of memory backed by transparent huge pages cause an infinite loop in the kernel (handle_mm_fault sets FOLL_COW and returns 0 causing a retry, but follow_trans_huge_pmd bails out immidiately because `(flags & FOLL_WRITE) && !pmd_write(*pmd)` is true. While in this state the process is stil SIGKILLable, but little else works (e.g. no ptrace attach, no other signals). This is easily reproduced with the following code (assuming thp are set to always): #include #include #include #include #include #include #include #include #include #include #define TEST_SIZE 5 * 1024 * 1024 int main(void) { int status; pid_t child; int fd = open("/proc/self/mem", O_RDWR); void *addr = mmap(NULL, TEST_SIZE, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); assert(addr != MAP_FAILED); pid_t parent_pid = getpid(); if ((child = fork()) == 0) { void *addr2 = mmap(NULL, TEST_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); assert(addr2 != MAP_FAILED); memset(addr2, 'a', TEST_SIZE); pwrite(fd, addr2, TEST_SIZE, (uintptr_t)addr); return 0; } assert(child == waitpid(child, &status, 0)); assert(WIFEXITED(status) && WEXITSTATUS(status) == 0); return 0; } Fix this by updating follow_trans_huge_pmd in huge_memory.c analogously to the update in gup.c in the original commit. The same pattern exists in follow_devmap_pmd. However, we should not be able to reach that check with FOLL_COW set, so add WARN_ONCE to make sure we notice if we ever do. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170106015025.GA38411@juliacomputing.com Signed-off-by: Keno Fischer Acked-by: Kirill A. Shutemov Cc: Greg Thelen Cc: Nicholas Piggin Cc: Willy Tarreau Cc: Oleg Nesterov Cc: Kees Cook Cc: Andy Lutomirski Cc: Michal Hocko Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8ca40b70beae..917555cf6be0 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -772,6 +772,12 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, assert_spin_locked(pmd_lockptr(mm, pmd)); + /* + * When we COW a devmap PMD entry, we split it into PTEs, so we should + * not be in this function with `flags & FOLL_COW` set. + */ + WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); + if (flags & FOLL_WRITE && !pmd_write(*pmd)) return NULL; @@ -1118,6 +1124,16 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd) return ret; } +/* + * FOLL_FORCE can write to even unwritable pmd's, but only + * after we've gone through a COW cycle and they are dirty. + */ +static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) +{ + return pmd_write(pmd) || + ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); +} + struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, @@ -1128,7 +1144,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, assert_spin_locked(pmd_lockptr(mm, pmd)); - if (flags & FOLL_WRITE && !pmd_write(*pmd)) + if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags)) goto out; /* Avoid dumping huge zero page */ -- GitLab From 9b1a1ae9b5281e12893a34bcbd1686b5bcd2cd82 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jan 2017 15:18:18 -0800 Subject: [PATCH 1253/1442] mm/mempolicy.c: do not put mempolicy before using its nodemask commit d51e9894d27492783fc6d1b489070b4ba66ce969 upstream. Since commit be97a41b291e ("mm/mempolicy.c: merge alloc_hugepage_vma to alloc_pages_vma") alloc_pages_vma() can potentially free a mempolicy by mpol_cond_put() before accessing the embedded nodemask by __alloc_pages_nodemask(). The commit log says it's so "we can use a single exit path within the function" but that's clearly wrong. We can still do that when doing mpol_cond_put() after the allocation attempt. Make sure the mempolicy is not freed prematurely, otherwise __alloc_pages_nodemask() can end up using a bogus nodemask, which could lead e.g. to premature OOM. Fixes: be97a41b291e ("mm/mempolicy.c: merge alloc_hugepage_vma to alloc_pages_vma") Link: http://lkml.kernel.org/r/20170118141124.8345-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko Acked-by: David Rientjes Cc: Aneesh Kumar K.V Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0b859af06b87..f75704717e47 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2023,8 +2023,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, nmask = policy_nodemask(gfp, pol); zl = policy_zonelist(gfp, pol, node); - mpol_cond_put(pol); page = __alloc_pages_nodemask(gfp, order, zl, nmask); + mpol_cond_put(pol); out: if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; -- GitLab From ade7afe9dca6b13919f88abd38eefe32f22eaeb3 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jan 2017 15:18:32 -0800 Subject: [PATCH 1254/1442] mm, page_alloc: fix check for NULL preferred_zone commit ea57485af8f4221312a5a95d63c382b45e7840dc upstream. Patch series "fix premature OOM regression in 4.7+ due to cpuset races". This is v2 of my attempt to fix the recent report based on LTP cpuset stress test [1]. The intention is to go to stable 4.9 LTSS with this, as triggering repeated OOMs is not nice. That's why the patches try to be not too intrusive. Unfortunately why investigating I found that modifying the testcase to use per-VMA policies instead of per-task policies will bring the OOM's back, but that seems to be much older and harder to fix problem. I have posted a RFC [2] but I believe that fixing the recent regressions has a higher priority. Longer-term we might try to think how to fix the cpuset mess in a better and less error prone way. I was for example very surprised to learn, that cpuset updates change not only task->mems_allowed, but also nodemask of mempolicies. Until now I expected the parameter to alloc_pages_nodemask() to be stable. I wonder why do we then treat cpusets specially in get_page_from_freelist() and distinguish HARDWALL etc, when there's unconditional intersection between mempolicy and cpuset. I would expect the nodemask adjustment for saving overhead in g_p_f(), but that clearly doesn't happen in the current form. So we have both crazy complexity and overhead, AFAICS. [1] https://lkml.kernel.org/r/CAFpQJXUq-JuEP=QPidy4p_=FN0rkH5Z-kfB4qBvsf6jMS87Edg@mail.gmail.com [2] https://lkml.kernel.org/r/7c459f26-13a6-a817-e508-b65b903a8378@suse.cz This patch (of 4): Since commit c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") we have a wrong check for NULL preferred_zone, which can theoretically happen due to concurrent cpuset modification. We check the zoneref pointer which is never NULL and we should check the zone pointer. Also document this in first_zones_zonelist() comment per Michal Hocko. Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") Link: http://lkml.kernel.org/r/20170120103843.24587-2-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: Ganapatrao Kulkarni Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 6 +++++- mm/page_alloc.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0f088f3a2fed..f99c993dd500 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -972,12 +972,16 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, * @zonelist - The zonelist to search for a suitable zone * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with - * @zone - The first suitable zone found is returned via this parameter + * @return - Zoneref pointer for the first suitable zone found (see below) * * This function returns the first zone at or below a given zone index that is * within the allowed nodemask. The zoneref returned is a cursor that can be * used to iterate the zonelist with next_zones_zonelist by advancing it by * one before calling. + * + * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is + * never NULL). This may happen either genuinely, or due to concurrent nodemask + * update due to cpuset modification. */ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, enum zone_type highest_zoneidx, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 34ada718ef47..593a11d8bc6b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3763,7 +3763,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, */ ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx, ac.nodemask); - if (!ac.preferred_zoneref) { + if (!ac.preferred_zoneref->zone) { page = NULL; goto no_zone; } -- GitLab From d1656c5aef4d72f03a7833d07a378c8f604b8307 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jan 2017 15:18:35 -0800 Subject: [PATCH 1255/1442] mm, page_alloc: fix fast-path race with cpuset update or removal commit 16096c25bf0ca5d87e4fa6ec6108ba53feead212 upstream. Ganapatrao Kulkarni reported that the LTP test cpuset01 in stress mode triggers OOM killer in few seconds, despite lots of free memory. The test attempts to repeatedly fault in memory in one process in a cpuset, while changing allowed nodes of the cpuset between 0 and 1 in another process. One possible cause is that in the fast path we find the preferred zoneref according to current mems_allowed, so that it points to the middle of the zonelist, skipping e.g. zones of node 1 completely. If the mems_allowed is updated to contain only node 1, we never reach it in the zonelist, and trigger OOM before checking the cpuset_mems_cookie. This patch fixes the particular case by redoing the preferred zoneref search if we switch back to the original nodemask. The condition is also slightly changed so that when the last non-root cpuset is removed, we don't miss it. Note that this is not a full fix, and more patches will follow. Link: http://lkml.kernel.org/r/20170120103843.24587-3-vbabka@suse.cz Fixes: 682a3385e773 ("mm, page_alloc: inline the fast path of the zonelist iterator") Signed-off-by: Vlastimil Babka Reported-by: Ganapatrao Kulkarni Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 593a11d8bc6b..dedadb4a779f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3783,9 +3783,17 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, /* * Restore the original nodemask if it was potentially replaced with * &cpuset_current_mems_allowed to optimize the fast-path attempt. + * Also recalculate the starting point for the zonelist iterator or + * we could end up iterating over non-eligible zones endlessly. */ - if (cpusets_enabled()) + if (unlikely(ac.nodemask != nodemask)) { ac.nodemask = nodemask; + ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, + ac.high_zoneidx, ac.nodemask); + if (!ac.preferred_zoneref->zone) + goto no_zone; + } + page = __alloc_pages_slowpath(alloc_mask, order, &ac); no_zone: -- GitLab From b678e4ff7ce0d01bb14f0adb92c1786b0a341cca Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jan 2017 15:18:38 -0800 Subject: [PATCH 1256/1442] mm, page_alloc: move cpuset seqcount checking to slowpath commit 5ce9bfef1d27944c119a397a9d827bef795487ce upstream. This is a preparation for the following patch to make review simpler. While the primary motivation is a bug fix, this also simplifies the fast path, although the moved code is only enabled when cpusets are in use. Link: http://lkml.kernel.org/r/20170120103843.24587-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: Ganapatrao Kulkarni Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dedadb4a779f..3e04bb398dc6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3502,12 +3502,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct page *page = NULL; unsigned int alloc_flags; unsigned long did_some_progress; - enum compact_priority compact_priority = DEF_COMPACT_PRIORITY; + enum compact_priority compact_priority; enum compact_result compact_result; - int compaction_retries = 0; - int no_progress_loops = 0; + int compaction_retries; + int no_progress_loops; unsigned long alloc_start = jiffies; unsigned int stall_timeout = 10 * HZ; + unsigned int cpuset_mems_cookie; /* * In the slowpath, we sanity check order to avoid ever trying to @@ -3528,6 +3529,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) gfp_mask &= ~__GFP_ATOMIC; +retry_cpuset: + compaction_retries = 0; + no_progress_loops = 0; + compact_priority = DEF_COMPACT_PRIORITY; + cpuset_mems_cookie = read_mems_allowed_begin(); + /* * The fast path uses conservative alloc_flags to succeed only until * kswapd needs to be woken up, and to avoid the cost of setting up @@ -3699,6 +3706,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, } nopage: + /* + * When updating a task's mems_allowed, it is possible to race with + * parallel threads in such a way that an allocation can fail while + * the mask is being updated. If a page allocation is about to fail, + * check if the cpuset changed during allocation and if so, retry. + */ + if (read_mems_allowed_retry(cpuset_mems_cookie)) + goto retry_cpuset; + warn_alloc(gfp_mask, "page allocation failure: order:%u", order); got_pg: @@ -3713,7 +3729,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask) { struct page *page; - unsigned int cpuset_mems_cookie; unsigned int alloc_flags = ALLOC_WMARK_LOW; gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { @@ -3750,9 +3765,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) alloc_flags |= ALLOC_CMA; -retry_cpuset: - cpuset_mems_cookie = read_mems_allowed_begin(); - /* Dirty zone balancing only done in the fast path */ ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); @@ -3765,6 +3777,11 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ac.high_zoneidx, ac.nodemask); if (!ac.preferred_zoneref->zone) { page = NULL; + /* + * This might be due to race with cpuset_current_mems_allowed + * update, so make sure we retry with original nodemask in the + * slow path. + */ goto no_zone; } @@ -3773,6 +3790,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (likely(page)) goto out; +no_zone: /* * Runtime PM, block IO and its error handling path can deadlock * because I/O on the device might not complete. @@ -3790,24 +3808,11 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ac.nodemask = nodemask; ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx, ac.nodemask); - if (!ac.preferred_zoneref->zone) - goto no_zone; + /* If we have NULL preferred zone, slowpath wll handle that */ } page = __alloc_pages_slowpath(alloc_mask, order, &ac); -no_zone: - /* - * When updating a task's mems_allowed, it is possible to race with - * parallel threads in such a way that an allocation can fail while - * the mask is being updated. If a page allocation is about to fail, - * check if the cpuset changed during allocation and if so, retry. - */ - if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) { - alloc_mask = gfp_mask; - goto retry_cpuset; - } - out: if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) { -- GitLab From 96e5cec10e7a75c931f8993633b3a5cedc99144e Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jan 2017 15:18:41 -0800 Subject: [PATCH 1257/1442] mm, page_alloc: fix premature OOM when racing with cpuset mems update commit e47483bca2cc59a4593b37a270b16ee42b1d9f08 upstream. Ganapatrao Kulkarni reported that the LTP test cpuset01 in stress mode triggers OOM killer in few seconds, despite lots of free memory. The test attempts to repeatedly fault in memory in one process in a cpuset, while changing allowed nodes of the cpuset between 0 and 1 in another process. The problem comes from insufficient protection against cpuset changes, which can cause get_page_from_freelist() to consider all zones as non-eligible due to nodemask and/or current->mems_allowed. This was masked in the past by sufficient retries, but since commit 682a3385e773 ("mm, page_alloc: inline the fast path of the zonelist iterator") we fix the preferred_zoneref once, and don't iterate over the whole zonelist in further attempts, thus the only eligible zones might be placed in the zonelist before our starting point and we always miss them. A previous patch fixed this problem for current->mems_allowed. However, cpuset changes also update the task's mempolicy nodemask. The fix has two parts. We have to repeat the preferred_zoneref search when we detect cpuset update by way of seqcount, and we have to check the seqcount before considering OOM. [akpm@linux-foundation.org: fix typo in comment] Link: http://lkml.kernel.org/r/20170120103843.24587-5-vbabka@suse.cz Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") Signed-off-by: Vlastimil Babka Reported-by: Ganapatrao Kulkarni Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e04bb398dc6..f4a02e240fb6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3534,6 +3534,17 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, no_progress_loops = 0; compact_priority = DEF_COMPACT_PRIORITY; cpuset_mems_cookie = read_mems_allowed_begin(); + /* + * We need to recalculate the starting point for the zonelist iterator + * because we might have used different nodemask in the fast path, or + * there was a cpuset modification and we are retrying - otherwise we + * could end up iterating over non-eligible zones endlessly. + */ + ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, + ac->high_zoneidx, ac->nodemask); + if (!ac->preferred_zoneref->zone) + goto nopage; + /* * The fast path uses conservative alloc_flags to succeed only until @@ -3694,6 +3705,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, &compaction_retries)) goto retry; + /* + * It's possible we raced with cpuset update so the OOM would be + * premature (see below the nopage: label for full explanation). + */ + if (read_mems_allowed_retry(cpuset_mems_cookie)) + goto retry_cpuset; + /* Reclaim has failed us, start killing things */ page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress); if (page) @@ -3707,10 +3725,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, nopage: /* - * When updating a task's mems_allowed, it is possible to race with - * parallel threads in such a way that an allocation can fail while - * the mask is being updated. If a page allocation is about to fail, - * check if the cpuset changed during allocation and if so, retry. + * When updating a task's mems_allowed or mempolicy nodemask, it is + * possible to race with parallel threads in such a way that our + * allocation can fail while the mask is being updated. If we are about + * to fail, check if the cpuset changed during allocation and if so, + * retry. */ if (read_mems_allowed_retry(cpuset_mems_cookie)) goto retry_cpuset; @@ -3801,15 +3820,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, /* * Restore the original nodemask if it was potentially replaced with * &cpuset_current_mems_allowed to optimize the fast-path attempt. - * Also recalculate the starting point for the zonelist iterator or - * we could end up iterating over non-eligible zones endlessly. */ - if (unlikely(ac.nodemask != nodemask)) { + if (unlikely(ac.nodemask != nodemask)) ac.nodemask = nodemask; - ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, - ac.high_zoneidx, ac.nodemask); - /* If we have NULL preferred zone, slowpath wll handle that */ - } page = __alloc_pages_slowpath(alloc_mask, order, &ac); -- GitLab From 13e39d5930ec5a4e0711a6d88bcea84bfd13d4bb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 20 Jan 2017 10:33:32 +0000 Subject: [PATCH 1258/1442] vring: Force use of DMA API for ARM-based systems with legacy devices commit c7070619f3408d9a0dffbed9149e6f00479cf43b upstream. Booting Linux on an ARM fastmodel containing an SMMU emulation results in an unexpected I/O page fault from the legacy virtio-blk PCI device: [ 1.211721] arm-smmu-v3 2b400000.smmu: event 0x10 received: [ 1.211800] arm-smmu-v3 2b400000.smmu: 0x00000000fffff010 [ 1.211880] arm-smmu-v3 2b400000.smmu: 0x0000020800000000 [ 1.211959] arm-smmu-v3 2b400000.smmu: 0x00000008fa081002 [ 1.212075] arm-smmu-v3 2b400000.smmu: 0x0000000000000000 [ 1.212155] arm-smmu-v3 2b400000.smmu: event 0x10 received: [ 1.212234] arm-smmu-v3 2b400000.smmu: 0x00000000fffff010 [ 1.212314] arm-smmu-v3 2b400000.smmu: 0x0000020800000000 [ 1.212394] arm-smmu-v3 2b400000.smmu: 0x00000008fa081000 [ 1.212471] arm-smmu-v3 2b400000.smmu: 0x0000000000000000 This is because the legacy virtio-blk device is behind an SMMU, so we have consequently swizzled its DMA ops and configured the SMMU to translate accesses. This then requires the vring code to use the DMA API to establish translations, otherwise all transactions will result in fatal faults and termination. Given that ARM-based systems only see an SMMU if one is really present (the topology is all described by firmware tables such as device-tree or IORT), then we can safely use the DMA API for all legacy virtio devices. Modern devices can advertise the prescense of an IOMMU using the VIRTIO_F_IOMMU_PLATFORM feature flag. Cc: Andy Lutomirski Cc: Michael S. Tsirkin Fixes: 876945dbf649 ("arm64: Hook up IOMMU dma_ops") Signed-off-by: Will Deacon Signed-off-by: Michael S. Tsirkin Acked-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_ring.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 489bfc61cf30..f1360487a594 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -159,6 +159,13 @@ static bool vring_use_dma_api(struct virtio_device *vdev) if (xen_domain()) return true; + /* + * On ARM-based machines, the DMA ops will do the right thing, + * so always use them with legacy devices. + */ + if (IS_ENABLED(CONFIG_ARM) || IS_ENABLED(CONFIG_ARM64)) + return !virtio_has_feature(vdev, VIRTIO_F_VERSION_1); + return false; } -- GitLab From c7556867782ee8ae0383a1e99df09222f1f01332 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 20 Jan 2017 15:21:35 +0200 Subject: [PATCH 1259/1442] userns: Make ucounts lock irq-safe commit 880a38547ff08715ce4f1daf9a4bb30c87676e68 upstream. The ucounts_lock is being used to protect various ucounts lifecycle management functionalities. However, those services can also be invoked when a pidns is being freed in an RCU callback (e.g. softirq context). This can lead to deadlocks. There were already efforts trying to prevent similar deadlocks in add7c65ca426 ("pid: fix lockdep deadlock warning due to ucount_lock"), however they just moved the context from hardirq to softrq. Fix this issue once and for all by explictly making the lock disable irqs altogether. Dmitry Vyukov reported: > I've got the following deadlock report while running syzkaller fuzzer > on eec0d3d065bfcdf9cd5f56dd2a36b94d12d32297 of linux-next (on odroid > device if it matters): > > ================================= > [ INFO: inconsistent lock state ] > 4.10.0-rc3-next-20170112-xc2-dirty #6 Not tainted > --------------------------------- > inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. > swapper/2/0 [HC0[0]:SC1[1]:HE1:SE0] takes: > (ucounts_lock){+.?...}, at: [< inline >] spin_lock > ./include/linux/spinlock.h:302 > (ucounts_lock){+.?...}, at: [] > put_ucounts+0x60/0x138 kernel/ucount.c:162 > {SOFTIRQ-ON-W} state was registered at: > [] mark_lock+0x220/0xb60 kernel/locking/lockdep.c:3054 > [< inline >] mark_irqflags kernel/locking/lockdep.c:2941 > [] __lock_acquire+0x388/0x3260 kernel/locking/lockdep.c:3295 > [] lock_acquire+0xa4/0x138 kernel/locking/lockdep.c:3753 > [< inline >] __raw_spin_lock ./include/linux/spinlock_api_smp.h:144 > [] _raw_spin_lock+0x90/0xd0 kernel/locking/spinlock.c:151 > [< inline >] spin_lock ./include/linux/spinlock.h:302 > [< inline >] get_ucounts kernel/ucount.c:131 > [] inc_ucount+0x80/0x6c8 kernel/ucount.c:189 > [< inline >] inc_mnt_namespaces fs/namespace.c:2818 > [] alloc_mnt_ns+0x78/0x3a8 fs/namespace.c:2849 > [] create_mnt_ns+0x28/0x200 fs/namespace.c:2959 > [< inline >] init_mount_tree fs/namespace.c:3199 > [] mnt_init+0x258/0x384 fs/namespace.c:3251 > [] vfs_caches_init+0x6c/0x80 fs/dcache.c:3626 > [] start_kernel+0x414/0x460 init/main.c:648 > [] __primary_switched+0x6c/0x70 arch/arm64/kernel/head.S:456 > irq event stamp: 2316924 > hardirqs last enabled at (2316924): [< inline >] rcu_do_batch > kernel/rcu/tree.c:2911 > hardirqs last enabled at (2316924): [< inline >] > invoke_rcu_callbacks kernel/rcu/tree.c:3182 > hardirqs last enabled at (2316924): [< inline >] > __rcu_process_callbacks kernel/rcu/tree.c:3149 > hardirqs last enabled at (2316924): [] > rcu_process_callbacks+0x7a4/0xc28 kernel/rcu/tree.c:3166 > hardirqs last disabled at (2316923): [< inline >] rcu_do_batch > kernel/rcu/tree.c:2900 > hardirqs last disabled at (2316923): [< inline >] > invoke_rcu_callbacks kernel/rcu/tree.c:3182 > hardirqs last disabled at (2316923): [< inline >] > __rcu_process_callbacks kernel/rcu/tree.c:3149 > hardirqs last disabled at (2316923): [] > rcu_process_callbacks+0x210/0xc28 kernel/rcu/tree.c:3166 > softirqs last enabled at (2316912): [] > _local_bh_enable+0x4c/0x80 kernel/softirq.c:155 > softirqs last disabled at (2316913): [< inline >] > do_softirq_own_stack ./include/linux/interrupt.h:488 > softirqs last disabled at (2316913): [< inline >] > invoke_softirq kernel/softirq.c:371 > softirqs last disabled at (2316913): [] > irq_exit+0x264/0x308 kernel/softirq.c:405 > > other info that might help us debug this: > Possible unsafe locking scenario: > > CPU0 > ---- > lock(ucounts_lock); > > lock(ucounts_lock); > > *** DEADLOCK *** > > 1 lock held by swapper/2/0: > #0: (rcu_callback){......}, at: [< inline >] __rcu_reclaim > kernel/rcu/rcu.h:108 > #0: (rcu_callback){......}, at: [< inline >] rcu_do_batch > kernel/rcu/tree.c:2919 > #0: (rcu_callback){......}, at: [< inline >] > invoke_rcu_callbacks kernel/rcu/tree.c:3182 > #0: (rcu_callback){......}, at: [< inline >] > __rcu_process_callbacks kernel/rcu/tree.c:3149 > #0: (rcu_callback){......}, at: [] > rcu_process_callbacks+0x720/0xc28 kernel/rcu/tree.c:3166 > > stack backtrace: > CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.10.0-rc3-next-20170112-xc2-dirty #6 > Hardware name: Hardkernel ODROID-C2 (DT) > Call trace: > [] dump_backtrace+0x0/0x440 arch/arm64/kernel/traps.c:500 > [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:225 > [] dump_stack+0x110/0x168 > [] print_usage_bug.part.27+0x49c/0x4bc > kernel/locking/lockdep.c:2387 > [< inline >] print_usage_bug kernel/locking/lockdep.c:2357 > [< inline >] valid_state kernel/locking/lockdep.c:2400 > [< inline >] mark_lock_irq kernel/locking/lockdep.c:2617 > [] mark_lock+0x934/0xb60 kernel/locking/lockdep.c:3065 > [< inline >] mark_irqflags kernel/locking/lockdep.c:2923 > [] __lock_acquire+0x640/0x3260 kernel/locking/lockdep.c:3295 > [] lock_acquire+0xa4/0x138 kernel/locking/lockdep.c:3753 > [< inline >] __raw_spin_lock ./include/linux/spinlock_api_smp.h:144 > [] _raw_spin_lock+0x90/0xd0 kernel/locking/spinlock.c:151 > [< inline >] spin_lock ./include/linux/spinlock.h:302 > [] put_ucounts+0x60/0x138 kernel/ucount.c:162 > [] dec_ucount+0xf4/0x158 kernel/ucount.c:214 > [< inline >] dec_pid_namespaces kernel/pid_namespace.c:89 > [] delayed_free_pidns+0x40/0xe0 kernel/pid_namespace.c:156 > [< inline >] __rcu_reclaim kernel/rcu/rcu.h:118 > [< inline >] rcu_do_batch kernel/rcu/tree.c:2919 > [< inline >] invoke_rcu_callbacks kernel/rcu/tree.c:3182 > [< inline >] __rcu_process_callbacks kernel/rcu/tree.c:3149 > [] rcu_process_callbacks+0x768/0xc28 kernel/rcu/tree.c:3166 > [] __do_softirq+0x324/0x6e0 kernel/softirq.c:284 > [< inline >] do_softirq_own_stack ./include/linux/interrupt.h:488 > [< inline >] invoke_softirq kernel/softirq.c:371 > [] irq_exit+0x264/0x308 kernel/softirq.c:405 > [] __handle_domain_irq+0xc0/0x150 kernel/irq/irqdesc.c:636 > [] gic_handle_irq+0x68/0xd8 > Exception stack(0xffff8000648e7dd0 to 0xffff8000648e7f00) > 7dc0: ffff8000648d4b3c 0000000000000007 > 7de0: 0000000000000000 1ffff0000c91a967 1ffff0000c91a967 1ffff0000c91a967 > 7e00: ffff20000a4b6b68 0000000000000001 0000000000000007 0000000000000001 > 7e20: 1fffe4000149ae90 ffff200009d35000 0000000000000000 0000000000000002 > 7e40: 0000000000000000 0000000000000000 0000000002624a1a 0000000000000000 > 7e60: 0000000000000000 ffff200009cbcd88 000060006d2ed000 0000000000000140 > 7e80: ffff200009cff000 ffff200009cb6000 ffff200009cc2020 ffff200009d2159d > 7ea0: 0000000000000000 ffff8000648d4380 0000000000000000 ffff8000648e7f00 > 7ec0: ffff20000820a478 ffff8000648e7f00 ffff20000820a47c 0000000010000145 > 7ee0: 0000000000000140 dfff200000000000 ffffffffffffffff ffff20000820a478 > [] el1_irq+0xb8/0x130 arch/arm64/kernel/entry.S:486 > [< inline >] arch_local_irq_restore > ./arch/arm64/include/asm/irqflags.h:81 > [] rcu_idle_exit+0x64/0xa8 kernel/rcu/tree.c:1030 > [< inline >] cpuidle_idle_call kernel/sched/idle.c:200 > [] do_idle+0x1dc/0x2d0 kernel/sched/idle.c:243 > [] cpu_startup_entry+0x24/0x28 kernel/sched/idle.c:345 > [] secondary_start_kernel+0x2cc/0x358 > arch/arm64/kernel/smp.c:276 > [<000000000279f1a4>] 0x279f1a4 Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Fixes: add7c65ca426 ("pid: fix lockdep deadlock warning due to ucount_lock") Fixes: f333c700c610 ("pidns: Add a limit on the number of pid namespaces") Link: https://www.spinics.net/lists/kernel/msg2426637.html Signed-off-by: Nikolay Borisov Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- kernel/ucount.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 9d20d5dd298a..4bbd38ec3788 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -128,10 +128,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) struct hlist_head *hashent = ucounts_hashentry(ns, uid); struct ucounts *ucounts, *new; - spin_lock(&ucounts_lock); + spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); if (!ucounts) { - spin_unlock(&ucounts_lock); + spin_unlock_irq(&ucounts_lock); new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) @@ -141,7 +141,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) new->uid = uid; atomic_set(&new->count, 0); - spin_lock(&ucounts_lock); + spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); if (ucounts) { kfree(new); @@ -152,16 +152,18 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) } if (!atomic_add_unless(&ucounts->count, 1, INT_MAX)) ucounts = NULL; - spin_unlock(&ucounts_lock); + spin_unlock_irq(&ucounts_lock); return ucounts; } static void put_ucounts(struct ucounts *ucounts) { + unsigned long flags; + if (atomic_dec_and_test(&ucounts->count)) { - spin_lock(&ucounts_lock); + spin_lock_irqsave(&ucounts_lock, flags); hlist_del_init(&ucounts->node); - spin_unlock(&ucounts_lock); + spin_unlock_irqrestore(&ucounts_lock, flags); kfree(ucounts); } -- GitLab From 03707d6c36f9c7355c22e43039c09804e96a02b6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Jan 2017 18:20:55 -0800 Subject: [PATCH 1260/1442] sysctl: fix proc_doulongvec_ms_jiffies_minmax() commit ff9f8a7cf935468a94d9927c68b00daae701667e upstream. We perform the conversion between kernel jiffies and ms only when exporting kernel value to user space. We need to do the opposite operation when value is written by user. Only matters when HZ != 1000 Signed-off-by: Eric Dumazet Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sysctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 706309f9ed84..c1095cdc0fe2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2487,6 +2487,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int break; if (neg) continue; + val = convmul * val / convdiv; if ((min && val < *min) || (max && val > *max)) continue; *i = val; -- GitLab From 4859524143609fee04d69a8069f9a83317e4bb7c Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 26 Jan 2017 13:18:09 -0800 Subject: [PATCH 1261/1442] xfs: prevent quotacheck from overloading inode lru commit e0d76fa4475ef2cf4b52d18588b8ce95153d021b upstream. Quotacheck runs at mount time in situations where quota accounting must be recalculated. In doing so, it uses bulkstat to visit every inode in the filesystem. Historically, every inode processed during quotacheck was released and immediately tagged for reclaim because quotacheck runs before the superblock is marked active by the VFS. In other words, the final iput() lead to an immediate ->destroy_inode() call, which allowed the XFS background reclaim worker to start reclaiming inodes. Commit 17c12bcd3 ("xfs: when replaying bmap operations, don't let unlinked inodes get reaped") marks the XFS superblock active sooner as part of the mount process to support caching inodes processed during log recovery. This occurs before quotacheck and thus means all inodes processed by quotacheck are inserted to the LRU on release. The s_umount lock is held until the mount has completed and thus prevents the shrinkers from operating on the sb. This means that quotacheck can excessively populate the inode LRU and lead to OOM conditions on systems without sufficient RAM. Update the quotacheck bulkstat handler to set XFS_IGET_DONTCACHE on inodes processed by quotacheck. This causes ->drop_inode() to return 1 and in turn causes iput_final() to evict the inode. This preserves the original quotacheck behavior and prevents it from overloading the LRU and running out of memory. Reported-by: Martin Svec Signed-off-by: Brian Foster Reviewed-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_qm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 45e50ea90769..b669b123287b 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1177,7 +1177,8 @@ xfs_qm_dqusage_adjust( * the case in all other instances. It's OK that we do this because * quotacheck is done only at mount time. */ - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); + error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, XFS_ILOCK_EXCL, + &ip); if (error) { *res = BULKSTAT_RV_NOTHING; return error; -- GitLab From 959f9709c0251275f05fd9092ff25e9b9bec237e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 27 Jan 2017 13:32:14 +0100 Subject: [PATCH 1262/1442] ISDN: eicon: silence misleading array-bounds warning commit 950eabbd6ddedc1b08350b9169a6a51b130ebaaf upstream. With some gcc versions, we get a warning about the eicon driver, and that currently shows up as the only remaining warning in one of the build bots: In file included from ../drivers/isdn/hardware/eicon/message.c:30:0: eicon/message.c: In function 'mixer_notify_update': eicon/platform.h:333:18: warning: array subscript is above array bounds [-Warray-bounds] The code is easily changed to open-code the unusual PUT_WORD() line causing this to avoid the warning. Link: http://arm-soc.lixom.net/buildlogs/stable-rc/v4.4.45/ Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/isdn/hardware/eicon/message.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/eicon/message.c b/drivers/isdn/hardware/eicon/message.c index 1a1d99704fe6..296f1411fe84 100644 --- a/drivers/isdn/hardware/eicon/message.c +++ b/drivers/isdn/hardware/eicon/message.c @@ -11297,7 +11297,8 @@ static void mixer_notify_update(PLCI *plci, byte others) ((CAPI_MSG *) msg)->header.ncci = 0; ((CAPI_MSG *) msg)->info.facility_req.Selector = SELECTOR_LINE_INTERCONNECT; ((CAPI_MSG *) msg)->info.facility_req.structs[0] = 3; - PUT_WORD(&(((CAPI_MSG *) msg)->info.facility_req.structs[1]), LI_REQ_SILENT_UPDATE); + ((CAPI_MSG *) msg)->info.facility_req.structs[1] = LI_REQ_SILENT_UPDATE & 0xff; + ((CAPI_MSG *) msg)->info.facility_req.structs[2] = LI_REQ_SILENT_UPDATE >> 8; ((CAPI_MSG *) msg)->info.facility_req.structs[3] = 0; w = api_put(notify_plci->appl, (CAPI_MSG *) msg); if (w != _QUEUE_FULL) -- GitLab From 79babd4a6ce26d6b0e6a56da47efb723431abc70 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 25 Jan 2017 17:06:38 -0800 Subject: [PATCH 1263/1442] Btrfs: remove old tree_root case in btrfs_read_locked_inode() commit 67ade058ef2c65a3e56878af9c293ec76722a2e5 upstream. As Jeff explained in c2951f32d36c ("btrfs: remove old tree_root dirent processing in btrfs_real_readdir()"), supporting this old format is no longer necessary since the Btrfs magic number has been updated since we changed to the current format. There are other places where we still handle this old format, but since this is part of a fix that is going to stable, I'm only removing this one for now. Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8e3a5a266917..e70b7c695592 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3819,10 +3819,7 @@ static int btrfs_read_locked_inode(struct inode *inode) break; case S_IFDIR: inode->i_fop = &btrfs_dir_file_operations; - if (root == root->fs_info->tree_root) - inode->i_op = &btrfs_dir_ro_inode_operations; - else - inode->i_op = &btrfs_dir_inode_operations; + inode->i_op = &btrfs_dir_inode_operations; break; case S_IFLNK: inode->i_op = &btrfs_symlink_inode_operations; -- GitLab From ad80fada9d6d8177d1593a9b5772e80a758db595 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 25 Jan 2017 17:06:39 -0800 Subject: [PATCH 1264/1442] Btrfs: disable xattr operations on subvolume directories commit 1fdf41941b8010691679638f8d0c8d08cfee7726 upstream. When you snapshot a subvolume containing a subvolume, you get a placeholder directory where the subvolume would be. These directory inodes have ->i_ops set to btrfs_dir_ro_inode_operations. Previously, these i_ops didn't include the xattr operation callbacks. The conversion to xattr_handlers missed this case, leading to bogus attempts to set xattrs on these inodes. This manifested itself as failures when running delayed inodes. To fix this, clear IOP_XATTR in ->i_opflags on these inodes. Fixes: 6c6ef9f26e59 ("xattr: Stop calling {get,set,remove}xattr inode operations") Cc: Andreas Gruenbacher Reported-by: Chris Murphy Tested-by: Chris Murphy Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e70b7c695592..68fed83dc742 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5679,6 +5679,7 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; inode->i_op = &btrfs_dir_ro_inode_operations; + inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; inode->i_mtime = current_time(inode); -- GitLab From ffb97c11d05fea9a3ff29ad1d9e9c854e0a06dc2 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 25 Jan 2017 17:06:40 -0800 Subject: [PATCH 1265/1442] Btrfs: remove ->{get, set}_acl() from btrfs_dir_ro_inode_operations commit 57b59ed2e5b91e958843609c7884794e29e6c4cb upstream. Subvolume directory inodes can't have ACLs. Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 68fed83dc742..be4da91d880f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10585,8 +10585,6 @@ static const struct inode_operations btrfs_dir_inode_operations = { static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, - .get_acl = btrfs_get_acl, - .set_acl = btrfs_set_acl, .update_time = btrfs_update_time, }; -- GitLab From 97a2e39b7ab93008c8bc432675ee36ad3de3fce1 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 15 Jan 2017 20:15:00 +0200 Subject: [PATCH 1266/1442] RDMA/cma: Fix unknown symbol when CONFIG_IPV6 is not enabled commit b4cfe3971f6eab542dd7ecc398bfa1aeec889934 upstream. If IPV6 has not been enabled in the underlying kernel, we must avoid calling IPV6 procedures in rdma_cm.ko. This requires using "IS_ENABLED(CONFIG_IPV6)" in "if" statements surrounding any code which calls external IPV6 procedures. In the instance fixed here, procedure cma_bind_addr() called ipv6_addr_type() -- which resulted in calling external procedure __ipv6_addr_type(). Fixes: 6c26a77124ff ("RDMA/cma: fix IPv6 address resolution") Cc: Spencer Baugh Signed-off-by: Jack Morgenstein Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2a6fc47a1dfb..c25768c2dd3b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2768,7 +2768,8 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, if (!src_addr || !src_addr->sa_family) { src_addr = (struct sockaddr *) &id->route.addr.src_addr; src_addr->sa_family = dst_addr->sa_family; - if (dst_addr->sa_family == AF_INET6) { + if (IS_ENABLED(CONFIG_IPV6) && + dst_addr->sa_family == AF_INET6) { struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; -- GitLab From 62d7f2123f19a7e81c53bcd08f2902cd54c52b64 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 23 Jan 2017 22:59:44 +0100 Subject: [PATCH 1267/1442] s390/mm: Fix cmma unused transfer from pgste into pte commit 0d6da872d3e4a60f43c295386d7ff9a4cdcd57e9 upstream. The last pgtable rework silently disabled the CMMA unused state by setting a local pte variable (a parameter) instead of propagating it back into the caller. Fix it. Fixes: ebde765c0e85 ("s390/mm: uninline ptep_xxx functions from pgtable.h") Cc: Martin Schwidefsky Cc: Claudio Imbrenda Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/pgtable.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 7a1897c51c54..d56ef26d4681 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -202,7 +202,7 @@ static inline pgste_t ptep_xchg_start(struct mm_struct *mm, return pgste; } -static inline void ptep_xchg_commit(struct mm_struct *mm, +static inline pte_t ptep_xchg_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pgste_t pgste, pte_t old, pte_t new) { @@ -220,6 +220,7 @@ static inline void ptep_xchg_commit(struct mm_struct *mm, } else { *ptep = new; } + return old; } pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, @@ -231,7 +232,7 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_direct(mm, addr, ptep); - ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; } @@ -246,7 +247,7 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, preempt_disable(); pgste = ptep_xchg_start(mm, addr, ptep); old = ptep_flush_lazy(mm, addr, ptep); - ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); preempt_enable(); return old; } -- GitLab From d7f56ee1198a1d0657368abc4dcd10ae109a8433 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 24 Jan 2017 08:05:52 +0100 Subject: [PATCH 1268/1442] s390/ptrace: Preserve previous registers for short regset write commit 9dce990d2cf57b5ed4e71a9cdbd7eae4335111ff upstream. Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. convert_vx_to_fp() is adapted to handle only a specified number of registers rather than unconditionally handling all of them: other callers of this function are adapted appropriately. Based on an initial patch by Dave Martin. Reported-by: Dave Martin Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/ptrace.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 9336e824e2db..fc2974b929c3 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -963,6 +963,11 @@ static int s390_fpregs_set(struct task_struct *target, if (target == current) save_fpu_regs(); + if (MACHINE_HAS_VX) + convert_vx_to_fp(fprs, target->thread.fpu.vxrs); + else + memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs)); + /* If setting FPC, must validate it first. */ if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { u32 ufpc[2] = { target->thread.fpu.fpc, 0 }; @@ -1067,6 +1072,9 @@ static int s390_vxrs_low_set(struct task_struct *target, if (target == current) save_fpu_regs(); + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1); + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); if (rc == 0) for (i = 0; i < __NUM_VXRS_LOW; i++) -- GitLab From 95600605ffaa644d3f6fce55faf40ec12cec7855 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sun, 22 Jan 2017 14:41:22 +0100 Subject: [PATCH 1269/1442] IB/cxgb3: fix misspelling in header guard commit b1a27eac7fefff33ccf6acc919fc0725bf9815fb upstream. Use CXGB3_... instead of CXBG3_... Fixes: a85fb3383340 ("IB/cxgb3: Move user vendor structures") Signed-off-by: Nicolas Iooss Reviewed-by: Leon Romanovsky Acked-by: Steve Wise Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- include/uapi/rdma/cxgb3-abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/rdma/cxgb3-abi.h b/include/uapi/rdma/cxgb3-abi.h index 48a19bda071b..d24eee12128f 100644 --- a/include/uapi/rdma/cxgb3-abi.h +++ b/include/uapi/rdma/cxgb3-abi.h @@ -30,7 +30,7 @@ * SOFTWARE. */ #ifndef CXGB3_ABI_USER_H -#define CXBG3_ABI_USER_H +#define CXGB3_ABI_USER_H #include -- GitLab From 24be606cd3b433412488ce962842489fe6e34a7a Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 18 Jan 2017 00:40:39 +0200 Subject: [PATCH 1270/1442] IB/iser: Fix sg_tablesize calculation commit 1e5db6c31ade4150c2e2b1a21e39f776c38fea39 upstream. For devices that can register page list that is bigger than USHRT_MAX, we actually take the wrong value for sg_tablesize. E.g: for CX4 max_fast_reg_page_list_len is 65536 (bigger than USHRT_MAX) so we set sg_tablesize to 0 by mistake. Therefore, each IO that is bigger than 4k splitted to "< 4k" chunks that cause performance degredation. Remove wrong sg_tablesize assignment, and use the value that was set during address resolution handler with the needed casting. Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/iser/iscsi_iser.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 64b3d11dcf1e..140f3f354cf3 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -651,13 +651,6 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, SHOST_DIX_GUARD_CRC); } - /* - * Limit the sg_tablesize and max_sectors based on the device - * max fastreg page list length. - */ - shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize, - ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len); - if (iscsi_host_add(shost, ib_conn->device->ib_device->dma_device)) { mutex_unlock(&iser_conn->state_mutex); -- GitLab From c2293e76babeebea894f584c11a07290b4f701da Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 28 Dec 2016 12:48:28 +0200 Subject: [PATCH 1271/1442] IB/srp: fix mr allocation when the device supports sg gaps commit ad8e66b4a80182174f73487ed25fd2140cf43361 upstream. If the device support arbitrary sg list mapping (device cap IB_DEVICE_SG_GAPS_REG set) we allocate the memory regions with IB_MR_TYPE_SG_GAPS. Fixes: 509c5f33f4f6 ("IB/srp: Prevent mapping failures") Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Leon Romanovsky Reviewed-by: Mark Bloch Reviewed-by: Yuval Shaia Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d980fb458ad4..69d2d6fb73c1 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -366,6 +366,7 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, struct srp_fr_desc *d; struct ib_mr *mr; int i, ret = -EINVAL; + enum ib_mr_type mr_type; if (pool_size <= 0) goto err; @@ -379,9 +380,13 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, spin_lock_init(&pool->lock); INIT_LIST_HEAD(&pool->free_list); + if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + mr_type = IB_MR_TYPE_SG_GAPS; + else + mr_type = IB_MR_TYPE_MEM_REG; + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { - mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, - max_page_list_len); + mr = ib_alloc_mr(pd, mr_type, max_page_list_len); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto destroy_pool; -- GitLab From a1af471b4151176e26330d3c4a9589711866e2b1 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 4 Jan 2017 15:59:37 +0200 Subject: [PATCH 1272/1442] IB/srp: fix invalid indirect_sg_entries parameter value commit 0a475ef4226e305bdcffe12b401ca1eab06c4913 upstream. After setting indirect_sg_entries module_param to huge value (e.g 500,000), srp_alloc_req_data() fails to allocate indirect descriptors for the request ring (kmalloc fails). This commit enforces the maximum value of indirect_sg_entries to be SG_MAX_SEGMENTS as signified in module param description. Fixes: 65e8617fba17 (scsi: rename SCSI_MAX_{SG, SG_CHAIN}_SEGMENTS) Fixes: c07d424d6118 (IB/srp: add support for indirect tables that don't fit in SRP_CMD) Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Laurence Oberman Reviewed-by: Bart Van Assche -- Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/srp/ib_srp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 69d2d6fb73c1..e7dcf14a76e2 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3683,6 +3683,12 @@ static int __init srp_init_module(void) indirect_sg_entries = cmd_sg_entries; } + if (indirect_sg_entries > SG_MAX_SEGMENTS) { + pr_warn("Clamping indirect_sg_entries to %u\n", + SG_MAX_SEGMENTS); + indirect_sg_entries = SG_MAX_SEGMENTS; + } + srp_remove_wq = create_workqueue("srp_remove"); if (!srp_remove_wq) { ret = -ENOMEM; -- GitLab From 9f56548b007769191f7dff7585eb317369991c22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Einar=20J=C3=B3n?= Date: Fri, 12 Aug 2016 13:50:41 +0200 Subject: [PATCH 1273/1442] can: c_can_pci: fix null-pointer-deref in c_can_start() - set device pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c97c52be78b8463ac5407f1cf1f22f8f6cf93a37 upstream. The priv->device pointer for c_can_pci is never set, but it is used without a NULL check in c_can_start(). Setting it in c_can_pci_probe() like c_can_plat_probe() prevents c_can_pci.ko from crashing, with and without CONFIG_PM. This might also cause the pm_runtime_*() functions in c_can.c to actually be executed for c_can_pci devices - they are the only other place where priv->device is used, but they all contain a null check. Signed-off-by: Einar Jón Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/c_can/c_can_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c index 7be393c96b1a..cf7c18947189 100644 --- a/drivers/net/can/c_can/c_can_pci.c +++ b/drivers/net/can/c_can/c_can_pci.c @@ -161,6 +161,7 @@ static int c_can_pci_probe(struct pci_dev *pdev, dev->irq = pdev->irq; priv->base = addr; + priv->device = &pdev->dev; if (!c_can_pci_data->freq) { dev_err(&pdev->dev, "no clock frequency defined\n"); -- GitLab From 50f5972cc23179e90e8452998bcb609694a9d927 Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Wed, 18 Jan 2017 11:35:57 +0100 Subject: [PATCH 1274/1442] can: ti_hecc: add missing prepare and unprepare of the clock commit befa60113ce7ea270cb51eada28443ca2756f480 upstream. In order to make the driver work with the common clock framework, this patch converts the clk_enable()/clk_disable() to clk_prepare_enable()/clk_disable_unprepare(). Also add error checking for clk_prepare_enable(). Signed-off-by: Yegor Yefremov Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/ti_hecc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 680d1ff07a55..6749b1829469 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -948,7 +948,12 @@ static int ti_hecc_probe(struct platform_device *pdev) netif_napi_add(ndev, &priv->napi, ti_hecc_rx_poll, HECC_DEF_NAPI_WEIGHT); - clk_enable(priv->clk); + err = clk_prepare_enable(priv->clk); + if (err) { + dev_err(&pdev->dev, "clk_prepare_enable() failed\n"); + goto probe_exit_clk; + } + err = register_candev(ndev); if (err) { dev_err(&pdev->dev, "register_candev() failed\n"); @@ -981,7 +986,7 @@ static int ti_hecc_remove(struct platform_device *pdev) struct ti_hecc_priv *priv = netdev_priv(ndev); unregister_candev(ndev); - clk_disable(priv->clk); + clk_disable_unprepare(priv->clk); clk_put(priv->clk); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); iounmap(priv->base); @@ -1006,7 +1011,7 @@ static int ti_hecc_suspend(struct platform_device *pdev, pm_message_t state) hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_PDR); priv->can.state = CAN_STATE_SLEEPING; - clk_disable(priv->clk); + clk_disable_unprepare(priv->clk); return 0; } @@ -1015,8 +1020,11 @@ static int ti_hecc_resume(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct ti_hecc_priv *priv = netdev_priv(dev); + int err; - clk_enable(priv->clk); + err = clk_prepare_enable(priv->clk); + if (err) + return err; hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_PDR); priv->can.state = CAN_STATE_ERROR_ACTIVE; -- GitLab From 9d5f2c151ec01949e4cc02333bc843b128110f55 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 24 Jan 2017 10:23:42 -0800 Subject: [PATCH 1275/1442] ARC: udelay: fix inline assembler by adding LP_COUNT to clobber list commit 36425cd67052e3becf325fd4d3ba5691791ef7e4 upstream. commit 3c7c7a2fc8811bc ("ARC: Don't use "+l" inline asm constraint") modified the inline assembly to setup LP_COUNT register manually and NOT rely on gcc to do it (with the +l inline assembler contraint hint, now being retired in the compiler) However the fix was flawed as we didn't add LP_COUNT to asm clobber list, meaning gcc doesn't know that LP_COUNT or zero-delay-loops are in action in the inline asm. This resulted in some fun - as nested ZOL loops were being generared | mov lp_count,250000 ;16 # tmp235, | lp .L__GCC__LP14 # <======= OUTER LOOP (gcc generated) | .L14: | ld r2, [r5] # MEM[(volatile u32 *)prephitmp_43], w | dmb 1 | breq r2, -1, @.L21 #, w,, | bbit0 r2,1,@.L13 # w,, | ld r4,[r7] ;25 # loops_per_jiffy, loops_per_jiffy | mpymu r3,r4,r6 #, loops_per_jiffy, tmp234 | | mov lp_count, r3 # <====== INNER LOOP (from inline asm) | lp 1f | nop | 1: | nop_s | .L__GCC__LP14: ; loop end, start is @.L14 #, This caused issues with drivers relying on sane behaviour of udelay friends. With LP_COUNT added to clobber list, gcc doesn't generate the outer loop in say above case. Addresses STAR 9001146134 Reported-by: Joao Pinto Fixes: 3c7c7a2fc8811bc ("ARC: Don't use "+l" inline asm constraint") Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/delay.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index a36e8601114d..d5da2115d78a 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -26,7 +26,9 @@ static inline void __delay(unsigned long loops) " lp 1f \n" " nop \n" "1: \n" - : : "r"(loops)); + : + : "r"(loops) + : "lp_count"); } extern void __bad_udelay(void); -- GitLab From ca332b96ba623aa3243a79d355684974f88c3f1a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 27 Jan 2017 10:45:27 -0800 Subject: [PATCH 1276/1442] ARC: [arcompact] handle unaligned access delay slot corner case commit 9aed02feae57bf7a40cb04ea0e3017cb7a998db4 upstream. After emulating an unaligned access in delay slot of a branch, we pretend as the delay slot never happened - so return back to actual branch target (or next PC if branch was not taken). Curently we did this by handling STATUS32.DE, we also need to clear the BTA.T bit, which is disregarded when returning from original misaligned exception, but could cause weirdness if it took the interrupt return path (in case interrupt was acive too) One ARC700 customer ran into this when enabling unaligned access fixup for kernel mode accesses as well Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/unaligned.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c index abd961f3e763..91ebe382147f 100644 --- a/arch/arc/kernel/unaligned.c +++ b/arch/arc/kernel/unaligned.c @@ -241,8 +241,9 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs, if (state.fault) goto fault; + /* clear any remanants of delay slot */ if (delay_mode(regs)) { - regs->ret = regs->bta; + regs->ret = regs->bta ~1U; regs->status32 &= ~STATUS_DE_MASK; } else { regs->ret += state.instr_len; -- GitLab From 2b95f1210e5010feb767edefcfff53f2c611c1c5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 28 Jan 2017 11:52:02 +0100 Subject: [PATCH 1277/1442] parisc: Don't use BITS_PER_LONG in userspace-exported swab.h header commit 2ad5d52d42810bed95100a3d912679d8864421ec upstream. In swab.h the "#if BITS_PER_LONG > 32" breaks compiling userspace programs if BITS_PER_LONG is #defined by userspace with the sizeof() compiler builtin. Solve this problem by using __BITS_PER_LONG instead. Since we now #include asm/bitsperlong.h avoid further potential userspace pollution by moving the #define of SHIFT_PER_LONG to bitops.h which is not exported to userspace. This patch unbreaks compiling qemu on hppa/parisc. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/bitops.h | 8 +++++++- arch/parisc/include/uapi/asm/bitsperlong.h | 2 -- arch/parisc/include/uapi/asm/swab.h | 5 +++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h index 3f9406d9b9d6..da87943328a5 100644 --- a/arch/parisc/include/asm/bitops.h +++ b/arch/parisc/include/asm/bitops.h @@ -6,7 +6,7 @@ #endif #include -#include /* for BITS_PER_LONG/SHIFT_PER_LONG */ +#include #include #include #include @@ -17,6 +17,12 @@ * to include/asm-i386/bitops.h or kerneldoc */ +#if __BITS_PER_LONG == 64 +#define SHIFT_PER_LONG 6 +#else +#define SHIFT_PER_LONG 5 +#endif + #define CHOP_SHIFTCOUNT(x) (((unsigned long) (x)) & (BITS_PER_LONG - 1)) diff --git a/arch/parisc/include/uapi/asm/bitsperlong.h b/arch/parisc/include/uapi/asm/bitsperlong.h index e0a23c7bdd43..07fa7e50bdc0 100644 --- a/arch/parisc/include/uapi/asm/bitsperlong.h +++ b/arch/parisc/include/uapi/asm/bitsperlong.h @@ -3,10 +3,8 @@ #if defined(__LP64__) #define __BITS_PER_LONG 64 -#define SHIFT_PER_LONG 6 #else #define __BITS_PER_LONG 32 -#define SHIFT_PER_LONG 5 #endif #include diff --git a/arch/parisc/include/uapi/asm/swab.h b/arch/parisc/include/uapi/asm/swab.h index e78403b129ef..928e1bbac98f 100644 --- a/arch/parisc/include/uapi/asm/swab.h +++ b/arch/parisc/include/uapi/asm/swab.h @@ -1,6 +1,7 @@ #ifndef _PARISC_SWAB_H #define _PARISC_SWAB_H +#include #include #include @@ -38,7 +39,7 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 x) } #define __arch_swab32 __arch_swab32 -#if BITS_PER_LONG > 32 +#if __BITS_PER_LONG > 32 /* ** From "PA-RISC 2.0 Architecture", HP Professional Books. ** See Appendix I page 8 , "Endian Byte Swapping". @@ -61,6 +62,6 @@ static inline __attribute_const__ __u64 __arch_swab64(__u64 x) return x; } #define __arch_swab64 __arch_swab64 -#endif /* BITS_PER_LONG > 32 */ +#endif /* __BITS_PER_LONG > 32 */ #endif /* _PARISC_SWAB_H */ -- GitLab From 73fdda3b01cd76c6570a4146bfdc6703cfce73ee Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 22 Jan 2017 14:04:29 -0500 Subject: [PATCH 1278/1442] nfs: Don't increment lock sequence ID after NFS4ERR_MOVED commit 059aa734824165507c65fd30a55ff000afd14983 upstream. Xuan Qi reports that the Linux NFSv4 client failed to lock a file that was migrated. The steps he observed on the wire: 1. The client sent a LOCK request to the source server 2. The source server replied NFS4ERR_MOVED 3. The client switched to the destination server 4. The client sent the same LOCK request to the destination server with a bumped lock sequence ID 5. The destination server rejected the LOCK request with NFS4ERR_BAD_SEQID RFC 3530 section 8.1.5 provides a list of NFS errors which do not bump a lock sequence ID. However, RFC 3530 is now obsoleted by RFC 7530. In RFC 7530 section 9.1.7, this list has been updated by the addition of NFS4ERR_MOVED. Reported-by: Xuan Qi Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- include/linux/nfs4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 9094faf0699d..039e76e91896 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -282,7 +282,7 @@ enum nfsstat4 { static inline bool seqid_mutating_err(u32 err) { - /* rfc 3530 section 8.1.5: */ + /* See RFC 7530, section 9.1.7 */ switch (err) { case NFS4ERR_STALE_CLIENTID: case NFS4ERR_STALE_STATEID: @@ -291,6 +291,7 @@ static inline bool seqid_mutating_err(u32 err) case NFS4ERR_BADXDR: case NFS4ERR_RESOURCE: case NFS4ERR_NOFILEHANDLE: + case NFS4ERR_MOVED: return false; }; return true; -- GitLab From 0a7023506112ea7c634dffe08683d5d90f52eec6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 23 Jan 2017 22:44:12 -0500 Subject: [PATCH 1279/1442] NFSv4.1: Fix a deadlock in layoutget commit 8ac092519ad91931c96d306c4bfae2c6587c325f upstream. We cannot call nfs4_handle_exception() without first ensuring that the slot has been freed. If not, we end up deadlocking with the process waiting for recovery to complete, and recovery waiting for the slot table to drain. Fixes: 2e80dbe7ac51 ("NFSv4.1: Close callback races for OPEN, LAYOUTGET...") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 241da19b7da4..e475a3d13f27 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8371,6 +8371,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, goto out; } + nfs4_sequence_free_slot(&lgp->res.seq_res); err = nfs4_handle_exception(server, nfs4err, exception); if (!status) { if (exception->retry) -- GitLab From 5637949edb50c54b3074f76b648d7f873d8a6814 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 24 Jan 2017 11:34:20 -0500 Subject: [PATCH 1280/1442] NFSv4.0: always send mode in SETATTR after EXCLUSIVE4 commit a430607b2ef7c3be090f88c71cfcb1b3988aa7c0 upstream. Some nfsv4.0 servers may return a mode for the verifier following an open with EXCLUSIVE4 createmode, but this does not mean the client should skip setting the mode in the following SETATTR. It should only do that for EXCLUSIVE4_1 or UNGAURDED createmode. Fixes: 5334c5bdac92 ("NFS: Send attributes in OPEN request for NFS4_CREATE_EXCLUSIVE4_1") Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e475a3d13f27..78ff8b63d5f7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2678,7 +2678,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, sattr->ia_valid |= ATTR_MTIME; /* Except MODE, it seems harmless of setting twice. */ - if ((attrset[1] & FATTR4_WORD1_MODE)) + if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE && + attrset[1] & FATTR4_WORD1_MODE) sattr->ia_valid &= ~ATTR_MODE; if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) -- GitLab From cb1d48f55a6dd1ad04caec1c140c2a136eb99206 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 20 Jan 2017 16:48:39 +0800 Subject: [PATCH 1281/1442] SUNRPC: cleanup ida information when removing sunrpc module commit c929ea0b910355e1876c64431f3d5802f95b3d75 upstream. After removing sunrpc module, I get many kmemleak information as, unreferenced object 0xffff88003316b1e0 (size 544): comm "gssproxy", pid 2148, jiffies 4294794465 (age 4200.081s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4a/0xa0 [] kmem_cache_alloc+0x15e/0x1f0 [] ida_pre_get+0xaa/0x150 [] ida_simple_get+0xad/0x180 [] nlmsvc_lookup_host+0x4ab/0x7f0 [lockd] [] lockd+0x4d/0x270 [lockd] [] param_set_timeout+0x55/0x100 [lockd] [] svc_defer+0x114/0x3f0 [sunrpc] [] svc_defer+0x2d7/0x3f0 [sunrpc] [] rpc_show_info+0x8a/0x110 [sunrpc] [] proc_reg_write+0x7f/0xc0 [] __vfs_write+0xdf/0x3c0 [] vfs_write+0xef/0x240 [] SyS_write+0xad/0x130 [] entry_SYSCALL_64_fastpath+0x1a/0xa9 [] 0xffffffffffffffff I found, the ida information (dynamic memory) isn't cleanup. Signed-off-by: Kinglong Mee Fixes: 2f048db4680a ("SUNRPC: Add an identifier for struct rpc_clnt") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 5 +++++ net/sunrpc/sunrpc_syms.c | 1 + 3 files changed, 7 insertions(+) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 85cc819676e8..333ad11b3dd9 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -216,5 +216,6 @@ void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); +void rpc_cleanup_clids(void); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 62a482790937..b2ae4f150ec6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -336,6 +336,11 @@ static int rpc_client_register(struct rpc_clnt *clnt, static DEFINE_IDA(rpc_clids); +void rpc_cleanup_clids(void) +{ + ida_destroy(&rpc_clids); +} + static int rpc_alloc_clid(struct rpc_clnt *clnt) { int clid; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index ee5d3d253102..3142f38d1104 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -119,6 +119,7 @@ init_sunrpc(void) static void __exit cleanup_sunrpc(void) { + rpc_cleanup_clids(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); -- GitLab From 0bd3cb8d470b7fdf0fbf677fcc19da915e740edc Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 22 Dec 2016 07:40:36 -0800 Subject: [PATCH 1282/1442] iw_cxgb4: free EQ queue memory on last deref commit c12a67fec8d99bb554e8d4e99120d418f1a39c87 upstream. Commit ad61a4c7a9b7 ("iw_cxgb4: don't block in destroy_qp awaiting the last deref") introduced a bug where the RDMA QP EQ queue memory (and QIDs) are possibly freed before the underlying connection has been fully shutdown. The result being a possible DMA read issued by HW after the queue memory has been unmapped and freed. This results in possible WR corruption in the worst case, system bus errors if an IOMMU is in use, and SGE "bad WR" errors reported in the very least. The fix is to defer unmap/free of queue memory and QID resources until the QP struct has been fully dereferenced. To do this, the c4iw_ucontext must also be kept around until the last QP that references it is fully freed. In addition, since the last QP deref can happen in an IRQ disabled context, we need a new workqueue thread to do the final unmap/free of the EQ queue memory. Fixes: ad61a4c7a9b7 ("iw_cxgb4: don't block in destroy_qp awaiting the last deref") Signed-off-by: Steve Wise Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb4/device.c | 9 +++++++ drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 18 +++++++++++++ drivers/infiniband/hw/cxgb4/provider.c | 20 ++++++++++++--- drivers/infiniband/hw/cxgb4/qp.c | 35 +++++++++++++++++++------- 4 files changed, 69 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index b99dc9e0ffb2..b85a1a983e07 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -848,9 +848,17 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) } } + rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free"); + if (!rdev->free_workq) { + err = -ENOMEM; + goto err_free_status_page; + } + rdev->status_page->db_off = 0; return 0; +err_free_status_page: + free_page((unsigned long)rdev->status_page); destroy_ocqp_pool: c4iw_ocqp_pool_destroy(rdev); destroy_rqtpool: @@ -864,6 +872,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) static void c4iw_rdev_close(struct c4iw_rdev *rdev) { + destroy_workqueue(rdev->free_workq); kfree(rdev->wr_log); free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 4788e1a46fde..7d540667dad2 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -45,6 +45,7 @@ #include #include #include +#include #include @@ -107,6 +108,7 @@ struct c4iw_dev_ucontext { struct list_head qpids; struct list_head cqids; struct mutex lock; + struct kref kref; }; enum c4iw_rdev_flags { @@ -183,6 +185,7 @@ struct c4iw_rdev { atomic_t wr_log_idx; struct wr_log_entry *wr_log; int wr_log_size; + struct workqueue_struct *free_workq; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -482,6 +485,8 @@ struct c4iw_qp { int sq_sig_all; struct completion rq_drained; struct completion sq_drained; + struct work_struct free_work; + struct c4iw_ucontext *ucontext; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) @@ -495,6 +500,7 @@ struct c4iw_ucontext { u32 key; spinlock_t mmap_lock; struct list_head mmaps; + struct kref kref; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) @@ -502,6 +508,18 @@ static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) return container_of(c, struct c4iw_ucontext, ibucontext); } +void _c4iw_free_ucontext(struct kref *kref); + +static inline void c4iw_put_ucontext(struct c4iw_ucontext *ucontext) +{ + kref_put(&ucontext->kref, _c4iw_free_ucontext); +} + +static inline void c4iw_get_ucontext(struct c4iw_ucontext *ucontext) +{ + kref_get(&ucontext->kref); +} + struct c4iw_mm_entry { struct list_head entry; u64 addr; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 645e606a17c5..8278ba06f995 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -91,17 +91,28 @@ static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, return -ENOSYS; } -static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +void _c4iw_free_ucontext(struct kref *kref) { - struct c4iw_dev *rhp = to_c4iw_dev(context->device); - struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); + struct c4iw_ucontext *ucontext; + struct c4iw_dev *rhp; struct c4iw_mm_entry *mm, *tmp; - PDBG("%s context %p\n", __func__, context); + ucontext = container_of(kref, struct c4iw_ucontext, kref); + rhp = to_c4iw_dev(ucontext->ibucontext.device); + + PDBG("%s ucontext %p\n", __func__, ucontext); list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) kfree(mm); c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx); kfree(ucontext); +} + +static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +{ + struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); + + PDBG("%s context %p\n", __func__, context); + c4iw_put_ucontext(ucontext); return 0; } @@ -125,6 +136,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); + kref_init(&context->kref); if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) { if (!warned++) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index b7ac97b27c88..bb0fde6e2047 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -714,13 +714,32 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) return 0; } -static void _free_qp(struct kref *kref) +static void free_qp_work(struct work_struct *work) +{ + struct c4iw_ucontext *ucontext; + struct c4iw_qp *qhp; + struct c4iw_dev *rhp; + + qhp = container_of(work, struct c4iw_qp, free_work); + ucontext = qhp->ucontext; + rhp = qhp->rhp; + + PDBG("%s qhp %p ucontext %p\n", __func__, qhp, ucontext); + destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + + if (ucontext) + c4iw_put_ucontext(ucontext); + kfree(qhp); +} + +static void queue_qp_free(struct kref *kref) { struct c4iw_qp *qhp; qhp = container_of(kref, struct c4iw_qp, kref); PDBG("%s qhp %p\n", __func__, qhp); - kfree(qhp); + queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work); } void c4iw_qp_add_ref(struct ib_qp *qp) @@ -732,7 +751,7 @@ void c4iw_qp_add_ref(struct ib_qp *qp) void c4iw_qp_rem_ref(struct ib_qp *qp) { PDBG("%s ib_qp %p\n", __func__, qp); - kref_put(&to_c4iw_qp(qp)->kref, _free_qp); + kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free); } static void add_to_fc_list(struct list_head *head, struct list_head *entry) @@ -1642,7 +1661,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) struct c4iw_dev *rhp; struct c4iw_qp *qhp; struct c4iw_qp_attributes attrs; - struct c4iw_ucontext *ucontext; qhp = to_c4iw_qp(ib_qp); rhp = qhp->rhp; @@ -1662,11 +1680,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) spin_unlock_irq(&rhp->lock); free_ird(rhp, qhp->attr.max_ird); - ucontext = ib_qp->uobject ? - to_c4iw_ucontext(ib_qp->uobject->context) : NULL; - destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - c4iw_qp_rem_ref(ib_qp); PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid); @@ -1767,6 +1780,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); kref_init(&qhp->kref); + INIT_WORK(&qhp->free_work, free_qp_work); ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); if (ret) @@ -1853,6 +1867,9 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ma_sync_key_mm->len = PAGE_SIZE; insert_mmap(ucontext, ma_sync_key_mm); } + + c4iw_get_ucontext(ucontext); + qhp->ucontext = ucontext; } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); -- GitLab From fe6531075e1dd8a7784bf0450186be1380eafb86 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Thu, 15 Dec 2016 19:51:07 -0200 Subject: [PATCH 1283/1442] pctv452e: move buffer to heap, no mutex commit 48775cb73c2e26b7ca9d679875a6e570c8b8e124 upstream. commit 73d5c5c864f4 ("[media] pctv452e: don't do DMA on stack") caused a NULL pointer dereference which occurs when dvb_usb_init() calls dvb_usb_device_power_ctrl() for the first time, before the frontend has been attached. It also caused a recursive deadlock because tt3650_ci_msg_locked() has already locked the mutex. So, partially revert it, but move the buffer to the heap (DMA capable), not to the stack (may not be DMA capable). Instead of sharing one buffer which needs mutex protection, do a new heap allocation for each call. Fixes: commit 73d5c5c864f4 ("[media] pctv452e: don't do DMA on stack") Signed-off-by: Max Kellermann Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/pctv452e.c | 133 +++++++++++++++------------ 1 file changed, 72 insertions(+), 61 deletions(-) diff --git a/drivers/media/usb/dvb-usb/pctv452e.c b/drivers/media/usb/dvb-usb/pctv452e.c index 07fa08be9e99..d54ebe7e0215 100644 --- a/drivers/media/usb/dvb-usb/pctv452e.c +++ b/drivers/media/usb/dvb-usb/pctv452e.c @@ -97,14 +97,13 @@ struct pctv452e_state { u8 c; /* transaction counter, wraps around... */ u8 initialized; /* set to 1 if 0x15 has been sent */ u16 last_rc_key; - - unsigned char data[80]; }; static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data, unsigned int write_len, unsigned int read_len) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; + u8 *buf; u8 id; unsigned int rlen; int ret; @@ -114,36 +113,39 @@ static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data, return -EIO; } - mutex_lock(&state->ca_mutex); + buf = kmalloc(64, GFP_KERNEL); + if (!buf) + return -ENOMEM; + id = state->c++; - state->data[0] = SYNC_BYTE_OUT; - state->data[1] = id; - state->data[2] = cmd; - state->data[3] = write_len; + buf[0] = SYNC_BYTE_OUT; + buf[1] = id; + buf[2] = cmd; + buf[3] = write_len; - memcpy(state->data + 4, data, write_len); + memcpy(buf + 4, data, write_len); rlen = (read_len > 0) ? 64 : 0; - ret = dvb_usb_generic_rw(d, state->data, 4 + write_len, - state->data, rlen, /* delay_ms */ 0); + ret = dvb_usb_generic_rw(d, buf, 4 + write_len, + buf, rlen, /* delay_ms */ 0); if (0 != ret) goto failed; ret = -EIO; - if (SYNC_BYTE_IN != state->data[0] || id != state->data[1]) + if (SYNC_BYTE_IN != buf[0] || id != buf[1]) goto failed; - memcpy(data, state->data + 4, read_len); + memcpy(data, buf + 4, read_len); - mutex_unlock(&state->ca_mutex); + kfree(buf); return 0; failed: err("CI error %d; %02X %02X %02X -> %*ph.", - ret, SYNC_BYTE_OUT, id, cmd, 3, state->data); + ret, SYNC_BYTE_OUT, id, cmd, 3, buf); - mutex_unlock(&state->ca_mutex); + kfree(buf); return ret; } @@ -410,53 +412,57 @@ static int pctv452e_i2c_msg(struct dvb_usb_device *d, u8 addr, u8 *rcv_buf, u8 rcv_len) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; + u8 *buf; u8 id; int ret; - mutex_lock(&state->ca_mutex); + buf = kmalloc(64, GFP_KERNEL); + if (!buf) + return -ENOMEM; + id = state->c++; ret = -EINVAL; if (snd_len > 64 - 7 || rcv_len > 64 - 7) goto failed; - state->data[0] = SYNC_BYTE_OUT; - state->data[1] = id; - state->data[2] = PCTV_CMD_I2C; - state->data[3] = snd_len + 3; - state->data[4] = addr << 1; - state->data[5] = snd_len; - state->data[6] = rcv_len; + buf[0] = SYNC_BYTE_OUT; + buf[1] = id; + buf[2] = PCTV_CMD_I2C; + buf[3] = snd_len + 3; + buf[4] = addr << 1; + buf[5] = snd_len; + buf[6] = rcv_len; - memcpy(state->data + 7, snd_buf, snd_len); + memcpy(buf + 7, snd_buf, snd_len); - ret = dvb_usb_generic_rw(d, state->data, 7 + snd_len, - state->data, /* rcv_len */ 64, + ret = dvb_usb_generic_rw(d, buf, 7 + snd_len, + buf, /* rcv_len */ 64, /* delay_ms */ 0); if (ret < 0) goto failed; /* TT USB protocol error. */ ret = -EIO; - if (SYNC_BYTE_IN != state->data[0] || id != state->data[1]) + if (SYNC_BYTE_IN != buf[0] || id != buf[1]) goto failed; /* I2C device didn't respond as expected. */ ret = -EREMOTEIO; - if (state->data[5] < snd_len || state->data[6] < rcv_len) + if (buf[5] < snd_len || buf[6] < rcv_len) goto failed; - memcpy(rcv_buf, state->data + 7, rcv_len); - mutex_unlock(&state->ca_mutex); + memcpy(rcv_buf, buf + 7, rcv_len); + kfree(buf); return rcv_len; failed: err("I2C error %d; %02X %02X %02X %02X %02X -> %*ph", ret, SYNC_BYTE_OUT, id, addr << 1, snd_len, rcv_len, - 7, state->data); + 7, buf); - mutex_unlock(&state->ca_mutex); + kfree(buf); return ret; } @@ -505,7 +511,7 @@ static u32 pctv452e_i2c_func(struct i2c_adapter *adapter) static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; - u8 *rx; + u8 *b0, *rx; int ret; info("%s: %d\n", __func__, i); @@ -516,11 +522,12 @@ static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) if (state->initialized) return 0; - rx = kmalloc(PCTV_ANSWER_LEN, GFP_KERNEL); - if (!rx) + b0 = kmalloc(5 + PCTV_ANSWER_LEN, GFP_KERNEL); + if (!b0) return -ENOMEM; - mutex_lock(&state->ca_mutex); + rx = b0 + 5; + /* hmm where shoud this should go? */ ret = usb_set_interface(d->udev, 0, ISOC_INTERFACE_ALTERNATIVE); if (ret != 0) @@ -528,66 +535,70 @@ static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) __func__, ret); /* this is a one-time initialization, dont know where to put */ - state->data[0] = 0xaa; - state->data[1] = state->c++; - state->data[2] = PCTV_CMD_RESET; - state->data[3] = 1; - state->data[4] = 0; + b0[0] = 0xaa; + b0[1] = state->c++; + b0[2] = PCTV_CMD_RESET; + b0[3] = 1; + b0[4] = 0; /* reset board */ - ret = dvb_usb_generic_rw(d, state->data, 5, rx, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, b0, 5, rx, PCTV_ANSWER_LEN, 0); if (ret) goto ret; - state->data[1] = state->c++; - state->data[4] = 1; + b0[1] = state->c++; + b0[4] = 1; /* reset board (again?) */ - ret = dvb_usb_generic_rw(d, state->data, 5, rx, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, b0, 5, rx, PCTV_ANSWER_LEN, 0); if (ret) goto ret; state->initialized = 1; ret: - mutex_unlock(&state->ca_mutex); - kfree(rx); + kfree(b0); return ret; } static int pctv452e_rc_query(struct dvb_usb_device *d) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; + u8 *b, *rx; int ret, i; u8 id; - mutex_lock(&state->ca_mutex); + b = kmalloc(CMD_BUFFER_SIZE + PCTV_ANSWER_LEN, GFP_KERNEL); + if (!b) + return -ENOMEM; + + rx = b + CMD_BUFFER_SIZE; + id = state->c++; /* prepare command header */ - state->data[0] = SYNC_BYTE_OUT; - state->data[1] = id; - state->data[2] = PCTV_CMD_IR; - state->data[3] = 0; + b[0] = SYNC_BYTE_OUT; + b[1] = id; + b[2] = PCTV_CMD_IR; + b[3] = 0; /* send ir request */ - ret = dvb_usb_generic_rw(d, state->data, 4, - state->data, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, b, 4, rx, PCTV_ANSWER_LEN, 0); if (ret != 0) goto ret; if (debug > 3) { - info("%s: read: %2d: %*ph: ", __func__, ret, 3, state->data); - for (i = 0; (i < state->data[3]) && ((i + 3) < PCTV_ANSWER_LEN); i++) - info(" %02x", state->data[i + 3]); + info("%s: read: %2d: %*ph: ", __func__, ret, 3, rx); + for (i = 0; (i < rx[3]) && ((i+3) < PCTV_ANSWER_LEN); i++) + info(" %02x", rx[i+3]); info("\n"); } - if ((state->data[3] == 9) && (state->data[12] & 0x01)) { + if ((rx[3] == 9) && (rx[12] & 0x01)) { /* got a "press" event */ - state->last_rc_key = RC_SCANCODE_RC5(state->data[7], state->data[6]); + state->last_rc_key = RC_SCANCODE_RC5(rx[7], rx[6]); if (debug > 2) info("%s: cmd=0x%02x sys=0x%02x\n", - __func__, state->data[6], state->data[7]); + __func__, rx[6], rx[7]); rc_keydown(d->rc_dev, RC_TYPE_RC5, state->last_rc_key, 0); } else if (state->last_rc_key) { @@ -595,7 +606,7 @@ static int pctv452e_rc_query(struct dvb_usb_device *d) state->last_rc_key = 0; } ret: - mutex_unlock(&state->ca_mutex); + kfree(b); return ret; } -- GitLab From d25f9bfeb62f812e8aa7eded53df8155bff25ee5 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 9 Dec 2016 09:47:17 -0200 Subject: [PATCH 1284/1442] v4l: tvp5150: Reset device at probe time, not in get/set format handlers commit aff808e813fc2d311137754165cf53d4ee6ddcc2 upstream. The tvp5150 doesn't support format setting through the subdev pad API and thus implements the set format handler as a get format operation. The single handler, tvp5150_fill_fmt(), resets the device by calling tvp5150_reset(). This causes malfunction as the device can be reset at will, possibly from userspace when the subdev userspace API is enabled. The reset call was added in commit ec2c4f3f93cb ("[media] media: tvp5150: Add mbus_fmt callbacks"), probably as an attempt to set the device to a known state before detecting the current TV standard. However, the get format handler doesn't access the hardware to get the TV standard since commit 963ddc63e20d ("[media] media: tvp5150: Add cropping support"). There is thus no need to reset the device when getting the format. However, removing the tvp5150_reset() from the get/set format handlers results in the function not being called at all if the bridge driver doesn't use the .reset() operation. The operation is nowadays abused and shouldn't be used, so shouldn't expect bridge drivers to call it. To make sure the device is properly initialize, move the reset call from the format handlers to the probe function. Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/tvp5150.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 7268e706e216..7fa359ea3031 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -858,8 +858,6 @@ static int tvp5150_fill_fmt(struct v4l2_subdev *sd, f = &format->format; - tvp5150_reset(sd, 0); - f->width = decoder->rect.width; f->height = decoder->rect.height / 2; @@ -1521,7 +1519,6 @@ static int tvp5150_probe(struct i2c_client *c, res = core->hdl.error; goto err; } - v4l2_ctrl_handler_setup(&core->hdl); /* Default is no cropping */ core->rect.top = 0; @@ -1532,6 +1529,8 @@ static int tvp5150_probe(struct i2c_client *c, core->rect.left = 0; core->rect.width = TVP5150_H_MAX; + tvp5150_reset(sd, 0); /* Calls v4l2_ctrl_handler_setup() */ + res = v4l2_async_register_subdev(sd); if (res < 0) goto err; -- GitLab From 11e5015ae1d12473f12baa1e8cc66849be019c3a Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 9 Dec 2016 09:47:18 -0200 Subject: [PATCH 1285/1442] v4l: tvp5150: Fix comment regarding output pin muxing commit b4b2de386bbb6589d81596999d4a924928dc119b upstream. The FID/GLCO/VLK/HVLK and INTREQ/GPCL/VBLK pins are muxed differently depending on whether the input is an S-Video or composite signal. The comment that explains the logic doesn't reflect the code. It appears that the comment is incorrect, as disabling the output data bus in composite mode makes no sense. Update the comment to match the code. While at it define macros for the MISC_CTL register bits, the code is too confusing with numerical values. Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/tvp5150.c | 24 +++++++++++++++++------- drivers/media/i2c/tvp5150_reg.h | 9 +++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 7fa359ea3031..e88d2d25b37e 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -288,8 +288,12 @@ static inline void tvp5150_selmux(struct v4l2_subdev *sd) tvp5150_write(sd, TVP5150_OP_MODE_CTL, opmode); tvp5150_write(sd, TVP5150_VD_IN_SRC_SEL_1, input); - /* Svideo should enable YCrCb output and disable GPCL output - * For Composite and TV, it should be the reverse + /* + * Setup the FID/GLCO/VLK/HVLK and INTREQ/GPCL/VBLK output signals. For + * S-Video we output the vertical lock (VLK) signal on FID/GLCO/VLK/HVLK + * and set INTREQ/GPCL/VBLK to logic 0. For composite we output the + * field indicator (FID) signal on FID/GLCO/VLK/HVLK and set + * INTREQ/GPCL/VBLK to logic 1. */ val = tvp5150_read(sd, TVP5150_MISC_CTL); if (val < 0) { @@ -298,9 +302,9 @@ static inline void tvp5150_selmux(struct v4l2_subdev *sd) } if (decoder->input == TVP5150_SVIDEO) - val = (val & ~0x40) | 0x10; + val = (val & ~TVP5150_MISC_CTL_GPCL) | TVP5150_MISC_CTL_HVLK; else - val = (val & ~0x10) | 0x40; + val = (val & ~TVP5150_MISC_CTL_HVLK) | TVP5150_MISC_CTL_GPCL; tvp5150_write(sd, TVP5150_MISC_CTL, val); }; @@ -452,7 +456,12 @@ static const struct i2c_reg_value tvp5150_init_enable[] = { },{ /* Automatic offset and AGC enabled */ TVP5150_ANAL_CHL_CTL, 0x15 },{ /* Activate YCrCb output 0x9 or 0xd ? */ - TVP5150_MISC_CTL, 0x6f + TVP5150_MISC_CTL, TVP5150_MISC_CTL_GPCL | + TVP5150_MISC_CTL_INTREQ_OE | + TVP5150_MISC_CTL_YCBCR_OE | + TVP5150_MISC_CTL_SYNC_OE | + TVP5150_MISC_CTL_VBLANK | + TVP5150_MISC_CTL_CLOCK_OE, },{ /* Activates video std autodetection for all standards */ TVP5150_AUTOSW_MSK, 0x0 },{ /* Default format: 0x47. For 4:2:2: 0x40 */ @@ -1047,11 +1056,12 @@ static int tvp5150_s_stream(struct v4l2_subdev *sd, int enable) { struct tvp5150 *decoder = to_tvp5150(sd); /* Output format: 8-bit ITU-R BT.656 with embedded syncs */ - int val = 0x09; + int val = TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_CLOCK_OE; /* Output format: 8-bit 4:2:2 YUV with discrete sync */ if (decoder->mbus_type == V4L2_MBUS_PARALLEL) - val = 0x0d; + val = TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_SYNC_OE + | TVP5150_MISC_CTL_CLOCK_OE; /* Initializes TVP5150 to its default values */ /* # set PCLK (27MHz) */ diff --git a/drivers/media/i2c/tvp5150_reg.h b/drivers/media/i2c/tvp5150_reg.h index 25a994944918..30a48c28d05a 100644 --- a/drivers/media/i2c/tvp5150_reg.h +++ b/drivers/media/i2c/tvp5150_reg.h @@ -9,6 +9,15 @@ #define TVP5150_ANAL_CHL_CTL 0x01 /* Analog channel controls */ #define TVP5150_OP_MODE_CTL 0x02 /* Operation mode controls */ #define TVP5150_MISC_CTL 0x03 /* Miscellaneous controls */ +#define TVP5150_MISC_CTL_VBLK_GPCL BIT(7) +#define TVP5150_MISC_CTL_GPCL BIT(6) +#define TVP5150_MISC_CTL_INTREQ_OE BIT(5) +#define TVP5150_MISC_CTL_HVLK BIT(4) +#define TVP5150_MISC_CTL_YCBCR_OE BIT(3) +#define TVP5150_MISC_CTL_SYNC_OE BIT(2) +#define TVP5150_MISC_CTL_VBLANK BIT(1) +#define TVP5150_MISC_CTL_CLOCK_OE BIT(0) + #define TVP5150_AUTOSW_MSK 0x04 /* Autoswitch mask: TVP5150A / TVP5150AM */ /* Reserved 05h */ -- GitLab From 1b8ff91ae545d36337de459c7195f6f369dc776d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 9 Dec 2016 09:47:19 -0200 Subject: [PATCH 1286/1442] v4l: tvp5150: Don't override output pinmuxing at stream on/off time commit 79d6205a3f741c9fb89cfc47dfa0eddb1526726d upstream. The s_stream() handler incorrectly writes the whole MISC_CTL register to enable or disable the outputs, overriding the output pinmuxing configuration. Fix it to only touch the output enable bits. The CONF_SHARED_PIN register is also written by the same function, resulting in muxing the INTREQ signal instead of the VBLK/GPCL signal on the INTREQ/GPCL/VBLK pin. As the driver doesn't support interrupts this is obviously incorrect, and breaks operation on other devices. Fix it by removing the write. Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/tvp5150.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index e88d2d25b37e..59aa4dafb60b 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -1055,22 +1055,27 @@ static const struct media_entity_operations tvp5150_sd_media_ops = { static int tvp5150_s_stream(struct v4l2_subdev *sd, int enable) { struct tvp5150 *decoder = to_tvp5150(sd); - /* Output format: 8-bit ITU-R BT.656 with embedded syncs */ - int val = TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_CLOCK_OE; - - /* Output format: 8-bit 4:2:2 YUV with discrete sync */ - if (decoder->mbus_type == V4L2_MBUS_PARALLEL) - val = TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_SYNC_OE - | TVP5150_MISC_CTL_CLOCK_OE; + int val; - /* Initializes TVP5150 to its default values */ - /* # set PCLK (27MHz) */ - tvp5150_write(sd, TVP5150_CONF_SHARED_PIN, 0x00); + /* Enable or disable the video output signals. */ + val = tvp5150_read(sd, TVP5150_MISC_CTL); + if (val < 0) + return val; + + val &= ~(TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_SYNC_OE | + TVP5150_MISC_CTL_CLOCK_OE); + + if (enable) { + /* + * Enable the YCbCr and clock outputs. In discrete sync mode + * (non-BT.656) additionally enable the the sync outputs. + */ + val |= TVP5150_MISC_CTL_YCBCR_OE | TVP5150_MISC_CTL_CLOCK_OE; + if (decoder->mbus_type == V4L2_MBUS_PARALLEL) + val |= TVP5150_MISC_CTL_SYNC_OE; + } - if (enable) - tvp5150_write(sd, TVP5150_MISC_CTL, val); - else - tvp5150_write(sd, TVP5150_MISC_CTL, 0x00); + tvp5150_write(sd, TVP5150_MISC_CTL, val); return 0; } -- GitLab From 9913aca2d5879a474f4ecb07c081ef1d0f7b3cf0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Jan 2017 15:59:40 +0000 Subject: [PATCH 1287/1442] drm/i915: Clear ret before unbinding in i915_gem_evict_something() commit e88893fea17996018b2d68a22e677ea04f3baadf upstream. Missed when rebasing patches, I failed to set ret to zero before starting the unbind loop (which depends upon ret being zero). Reported-by: Matthew Auld Fixes: 9332f3b1b99a ("drm/i915: Combine loops within i915_gem_evict_something") Signed-off-by: Chris Wilson Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170105155940.10033-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld (cherry picked from commit 121dfbb2a2ef1c5f49e15c38ccc47ff0beb59446) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_evict.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 5b6f81c1dbca..7467355e4a18 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -194,6 +194,7 @@ i915_gem_evict_something(struct i915_address_space *vm, } /* Unbinding will emit any required flushes */ + ret = 0; while (!list_empty(&eviction_list)) { vma = list_first_entry(&eviction_list, struct i915_vma, -- GitLab From 2cff678d0bb506595f7ca765c6e3b3ff0130f7a9 Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Wed, 18 Jan 2017 13:38:43 -0800 Subject: [PATCH 1288/1442] drm/i915: prevent crash with .disable_display parameter commit 27892bbdc9233f33bf0f44e08aab8f12e0dec142 upstream. The .disable_display parameter was causing a fatal crash when fbdev was dereferenced during driver init. V1: protection in i915_drv.c V2: Moved protection to intel_fbdev.c Fixes: 43cee314345a ("drm/i915/fbdev: Limit the global async-domain synchronization") Testcase: igt/drv_module_reload/basic-no-display Cc: Chris Wilson Signed-off-by: Clint Taylor Link: http://patchwork.freedesktop.org/patch/msgid/1484775523-29428-1-git-send-email-clinton.a.taylor@intel.com Reviewed-by: Chris Wilson Cc: Lukas Wunner Cc: Daniel Vetter Cc: Jani Nikula Signed-off-by: Chris Wilson (cherry picked from commit 5b8cd0755f8a06a851c436a013e7be0823fb155a) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_fbdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index b7098f98bb67..9127e57f383c 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -745,6 +745,9 @@ void intel_fbdev_initial_config_async(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + if (!ifbdev) + return; + ifbdev->cookie = async_schedule(intel_fbdev_initial_config, ifbdev); } -- GitLab From 8d7c76ae613ed9efd4548e85b894794a562dada4 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:42 +0200 Subject: [PATCH 1289/1442] drm/i915: Don't leak edid in intel_crt_detect_ddc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c34f078675f505c4437919bb1897b1351f16a050 upstream. In the path where intel_crt_detect_ddc() detects a CRT, if would return true without freeing the edid. Fixes: a2bd1f541f19 ("drm/i915: check whether we actually received an edid in detect_ddc") Cc: Chris Wilson Cc: Daniel Vetter Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-1-git-send-email-ander.conselvan.de.oliveira@intel.com (cherry picked from commit c96b63a6a7ac4bd670ec2e663793a9a31418b790) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_crt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index dfbcf16b41df..4149a0fbe8bd 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -499,6 +499,7 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(crt->base.base.dev); struct edid *edid; struct i2c_adapter *i2c; + bool ret = false; BUG_ON(crt->base.type != INTEL_OUTPUT_ANALOG); @@ -515,17 +516,17 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) */ if (!is_digital) { DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n"); - return true; + ret = true; + } else { + DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } - - DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } else { DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [no valid EDID found]\n"); } kfree(edid); - return false; + return ret; } static enum drm_connector_status -- GitLab From ff4956555513d0ad0088866c748a30bf10a61019 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:43 +0200 Subject: [PATCH 1290/1442] drm/i915: Don't init hpd polling for vlv and chv from runtime_suspend() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 21d6e0bde50713922a6520ef84e5fd245b05d468 upstream. An error in the condition for avoiding the call to intel_hpd_poll_init() for valleyview and cherryview from intel_runtime_suspend() caused it to be called unconditionally. Fix it. Fixes: 19625e85c6ec ("drm/i915: Enable polling when we don't have hpd") Cc: Ville Syrjälä Cc: Daniel Vetter Cc: Lyude Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-2-git-send-email-ander.conselvan.de.oliveira@intel.com (cherry picked from commit 04313b00b79405f86d815100f85c47a2ee5b8ca0) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 18dfdd5c1b3b..670beebc32f6 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2372,7 +2372,7 @@ static int intel_runtime_suspend(struct device *kdev) assert_forcewakes_inactive(dev_priv); - if (!IS_VALLEYVIEW(dev_priv) || !IS_CHERRYVIEW(dev_priv)) + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) intel_hpd_poll_init(dev_priv); DRM_DEBUG_KMS("Device suspended\n"); -- GitLab From 181478cdb75eb4d9865064b15cb93418c72759f2 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:44 +0200 Subject: [PATCH 1291/1442] drm/i915: Fix calculation of rotated x and y offsets for planar formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3781bd6e7d64d5f5bea9fdee11ab9460a700c0e4 upstream. Parameters tile_size, tile_width and tile_height were passed in the wrong order to _intel_adjust_tile_offset() when calculating the rotated offsets. This doesn't fix any user visible bug, since for packed formats new and old offset are the same and the rotated offsets are within a tile before they are fed to _intel_adjust_tile_offset(). In that case, the offsets are unchanged. That is not true for planar formats, but those are currently not supported. Fixes: 66a2d927cb0e ("drm/i915: Make intel_adjust_tile_offset() work for linear buffers") Cc: Ville Syrjälä Cc: Sivakumar Thulasimani Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-3-git-send-email-ander.conselvan.de.oliveira@intel.com (cherry picked from commit 46a1bd289507dfcc428fb9daf65421ed6be6af8b) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3c9cb35e46ab..944e067a9f28 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2587,8 +2587,9 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, * We only keep the x/y offsets, so push all of the * gtt offset into the x/y offsets. */ - _intel_adjust_tile_offset(&x, &y, tile_size, - tile_width, tile_height, pitch_tiles, + _intel_adjust_tile_offset(&x, &y, + tile_width, tile_height, + tile_size, pitch_tiles, gtt_offset_rotated * tile_size, 0); gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height; -- GitLab From d7c3d7e453b70e9e9e81d8409e5f8c004597ec05 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:45 +0200 Subject: [PATCH 1292/1442] drm/i915: Check for NULL atomic state in intel_crtc_disable_noatomic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6d1d427a4e24c403b4adf928d61994bdaa0ca03a upstream. In intel_crtc_disable_noatomic(), bail on a failure to allocate an atomic state to avoid a NULL pointer dereference. Fixes: 4a80655827af ("drm/i915: Pass atomic state to crtc enable/disable functions") Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-4-git-send-email-ander.conselvan.de.oliveira@intel.com (cherry picked from commit 31bb2ef97ea9db343348f9b5ccaa9bb6f48fc655) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 944e067a9f28..8079e5b380cb 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6869,6 +6869,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) } state = drm_atomic_state_alloc(crtc->dev); + if (!state) { + DRM_DEBUG_KMS("failed to disable [CRTC:%d:%s], out of memory", + crtc->base.id, crtc->name); + return; + } + state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; /* Everything's already locked, -EDEADLK can't happen. */ -- GitLab From c81ee4ed9c0d49514254487eb037548dd5e25ef8 Mon Sep 17 00:00:00 2001 From: Kenneth Lee Date: Thu, 5 Jan 2017 15:00:05 +0800 Subject: [PATCH 1293/1442] IB/umem: Release pid in error and ODP flow commit 828f6fa65ce7e80f77f5ab12942e44eb3d9d174e upstream. 1. Release pid before enter odp flow 2. Release pid when fail to allocate memory Fixes: 87773dd56d54 ("IB: ib_umem_release() should decrement mm->pinned_vm from ib_umem_get") Fixes: 8ada2c1c0c1d ("IB/core: Add support for on demand paging regions") Signed-off-by: Kenneth Lee Reviewed-by: Haggai Eran Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/umem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 84b4eff90395..c22fde6207d1 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -134,6 +134,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND)); if (access & IB_ACCESS_ON_DEMAND) { + put_pid(umem->pid); ret = ib_umem_odp_get(context, umem); if (ret) { kfree(umem); @@ -149,6 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { + put_pid(umem->pid); kfree(umem); return ERR_PTR(-ENOMEM); } -- GitLab From fed1e89a16e3684e1bd218e1d6a78f83b5ceb865 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Jan 2017 15:25:58 +0200 Subject: [PATCH 1294/1442] IB/rxe: Fix rxe dev insertion to rxe_dev_list commit f39f775218a7520e3700de2003c84a042c3b5972 upstream. The first argument of list_add_tail is the new item and the second is the head of the list. Fix the code to pass arguments in the right order, otherwise not all the rxe devices will be removed during teardown. Fixes: 8700e3e7c4857 ('Soft RoCE driver') Signed-off-by: Maor Gottlieb Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index ffff5a54cb34..f4f3942ebbd1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -554,7 +554,7 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev) } spin_lock_bh(&dev_list_lock); - list_add_tail(&rxe_dev_list, &rxe->list); + list_add_tail(&rxe->list, &rxe_dev_list); spin_unlock_bh(&dev_list_lock); return rxe; } -- GitLab From 9a335996bfcfce1fd7fe7549fb1a679489774b99 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Thu, 19 Jan 2017 15:25:59 +0200 Subject: [PATCH 1295/1442] IB/rxe: Prevent from completer to operate on non valid QP commit 2d4b21e0a2913612274a69a3ba1bfee4cffc6e77 upstream. On UD QP completer tasklet is scheduled for each packet sent. If it is followed by a destroy_qp(), the kernel panic will happen as the completer tries to operate on a destroyed QP. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/rxe/rxe_qp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 486d576e55bc..44b2108253bd 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -813,8 +813,7 @@ void rxe_qp_destroy(struct rxe_qp *qp) del_timer_sync(&qp->rnr_nak_timer); rxe_cleanup_task(&qp->req.task); - if (qp_type(qp) == IB_QPT_RC) - rxe_cleanup_task(&qp->comp.task); + rxe_cleanup_task(&qp->comp.task); /* flush out any receive wr's or pending requests */ __rxe_do_task(&qp->req.task); -- GitLab From 668a827a6c9daa88efb4464144a6bd7b2146c66c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Dec 2016 09:41:29 -0200 Subject: [PATCH 1296/1442] s5k4ecgx: select CRC32 helper commit c739c0a7c3c2472d7562b8f802cdce44d2597c8b upstream. A rare randconfig build failure shows up in this driver when the CRC32 helper is not there: drivers/media/built-in.o: In function `s5k4ecgx_s_power': s5k4ecgx.c:(.text+0x9eb4): undefined reference to `crc32_le' This adds the 'select' that all other users of this function have. Fixes: 8b99312b7214 ("[media] Add v4l2 subdev driver for S5K4ECGX sensor") Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index 2669b4bad910..5a27bffa02fb 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -655,6 +655,7 @@ config VIDEO_S5K6A3 config VIDEO_S5K4ECGX tristate "Samsung S5K4ECGX sensor support" depends on I2C && VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API + select CRC32 ---help--- This is a V4L2 sensor-level driver for Samsung S5K4ECGX 5M camera sensor with an embedded SoC image signal processor. -- GitLab From 35948ae694c0d6aceda6d41ca1b624206f702da1 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 10 Jan 2017 17:31:56 +0300 Subject: [PATCH 1297/1442] pinctrl: broxton: Use correct PADCFGLOCK offset commit ecc8995363ee6231b32dad61c955b371b79cc4cf upstream. PADCFGLOCK (and PADCFGLOCK_TX) offset in Broxton actually starts at 0x060 and not 0x090 as used in the driver. Fix it to use the correct offset. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-broxton.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-broxton.c b/drivers/pinctrl/intel/pinctrl-broxton.c index 59cb7a6fc5be..901b356b09d7 100644 --- a/drivers/pinctrl/intel/pinctrl-broxton.c +++ b/drivers/pinctrl/intel/pinctrl-broxton.c @@ -19,7 +19,7 @@ #define BXT_PAD_OWN 0x020 #define BXT_HOSTSW_OWN 0x080 -#define BXT_PADCFGLOCK 0x090 +#define BXT_PADCFGLOCK 0x060 #define BXT_GPI_IE 0x110 #define BXT_COMMUNITY(s, e) \ -- GitLab From 583eded5860b84344d91462796cc5e5d40bbc27b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 17 Jan 2017 19:52:54 +0900 Subject: [PATCH 1298/1442] pinctrl: uniphier: fix Ethernet (RMII) pin-mux setting for LD20 commit df1539c25cce98e2ac69881958850c6535240707 upstream. Fix the pin-mux values for the MDC, MDIO, MDIO_INTL, PHYRSTL pins. Fixes: 1e359ab1285e ("pinctrl: uniphier: add Ethernet pin-mux settings") Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c index aa8bd9794683..96686336e3a3 100644 --- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c +++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c @@ -561,7 +561,7 @@ static const int ether_rgmii_muxvals[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; static const unsigned ether_rmii_pins[] = {30, 31, 32, 33, 34, 35, 36, 37, 39, 41, 42, 45}; -static const int ether_rmii_muxvals[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +static const int ether_rmii_muxvals[] = {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}; static const unsigned i2c0_pins[] = {63, 64}; static const int i2c0_muxvals[] = {0, 0}; static const unsigned i2c1_pins[] = {65, 66}; -- GitLab From da1fdb8456ac79533aa09ce61b99a84ac7ca57b9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 10 Jan 2017 16:38:52 +0200 Subject: [PATCH 1299/1442] pinctrl: baytrail: Rectify debounce support commit 04ff5a095d662e0879f0eb04b9247e092210aeff upstream. The commit 658b476c742f ("pinctrl: baytrail: Add debounce configuration") implements debounce for Baytrail pin control, but seems wasn't tested properly. The register which keeps debounce value is separated from the configuration one. Writing wrong values to the latter will guarantee wrong behaviour of the driver and even might break something physically. Besides above there is missed case how to disable it, which is actually done through the bit in configuration register. Rectify implementation here by using proper register for debounce value. Fixes: 658b476c742f ("pinctrl: baytrail: Add debounce configuration") Cc: Cristina Ciocan Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-baytrail.c | 28 ++++++++++++++---------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 71bbeb9321ba..079015385fd8 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -1092,6 +1092,7 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset, enum pin_config_param param = pinconf_to_config_param(*config); void __iomem *conf_reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG); void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG); + void __iomem *db_reg = byt_gpio_reg(vg, offset, BYT_DEBOUNCE_REG); unsigned long flags; u32 conf, pull, val, debounce; u16 arg = 0; @@ -1128,7 +1129,7 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset, return -EINVAL; raw_spin_lock_irqsave(&vg->lock, flags); - debounce = readl(byt_gpio_reg(vg, offset, BYT_DEBOUNCE_REG)); + debounce = readl(db_reg); raw_spin_unlock_irqrestore(&vg->lock, flags); switch (debounce & BYT_DEBOUNCE_PULSE_MASK) { @@ -1176,6 +1177,7 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, unsigned int param, arg; void __iomem *conf_reg = byt_gpio_reg(vg, offset, BYT_CONF0_REG); void __iomem *val_reg = byt_gpio_reg(vg, offset, BYT_VAL_REG); + void __iomem *db_reg = byt_gpio_reg(vg, offset, BYT_DEBOUNCE_REG); unsigned long flags; u32 conf, val, debounce; int i, ret = 0; @@ -1238,36 +1240,40 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, break; case PIN_CONFIG_INPUT_DEBOUNCE: - debounce = readl(byt_gpio_reg(vg, offset, - BYT_DEBOUNCE_REG)); - conf &= ~BYT_DEBOUNCE_PULSE_MASK; + debounce = readl(db_reg); + debounce &= ~BYT_DEBOUNCE_PULSE_MASK; switch (arg) { + case 0: + conf &= BYT_DEBOUNCE_EN; + break; case 375: - conf |= BYT_DEBOUNCE_PULSE_375US; + debounce |= BYT_DEBOUNCE_PULSE_375US; break; case 750: - conf |= BYT_DEBOUNCE_PULSE_750US; + debounce |= BYT_DEBOUNCE_PULSE_750US; break; case 1500: - conf |= BYT_DEBOUNCE_PULSE_1500US; + debounce |= BYT_DEBOUNCE_PULSE_1500US; break; case 3000: - conf |= BYT_DEBOUNCE_PULSE_3MS; + debounce |= BYT_DEBOUNCE_PULSE_3MS; break; case 6000: - conf |= BYT_DEBOUNCE_PULSE_6MS; + debounce |= BYT_DEBOUNCE_PULSE_6MS; break; case 12000: - conf |= BYT_DEBOUNCE_PULSE_12MS; + debounce |= BYT_DEBOUNCE_PULSE_12MS; break; case 24000: - conf |= BYT_DEBOUNCE_PULSE_24MS; + debounce |= BYT_DEBOUNCE_PULSE_24MS; break; default: ret = -EINVAL; } + if (!ret) + writel(debounce, db_reg); break; default: ret = -ENOTSUPP; -- GitLab From 143a9ad4e68cc5c210e6e99e910d6b77cc8a9ec5 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Tue, 24 Jan 2017 15:17:45 -0800 Subject: [PATCH 1300/1442] memory_hotplug: make zone_can_shift() return a boolean value commit 8a1f780e7f28c7c1d640118242cf68d528c456cd upstream. online_{kernel|movable} is used to change the memory zone to ZONE_{NORMAL|MOVABLE} and online the memory. To check that memory zone can be changed, zone_can_shift() is used. Currently the function returns minus integer value, plus integer value and 0. When the function returns minus or plus integer value, it means that the memory zone can be changed to ZONE_{NORNAL|MOVABLE}. But when the function returns 0, there are two meanings. One of the meanings is that the memory zone does not need to be changed. For example, when memory is in ZONE_NORMAL and onlined by online_kernel the memory zone does not need to be changed. Another meaning is that the memory zone cannot be changed. When memory is in ZONE_NORMAL and onlined by online_movable, the memory zone may not be changed to ZONE_MOVALBE due to memory online limitation(see Documentation/memory-hotplug.txt). In this case, memory must not be onlined. The patch changes the return type of zone_can_shift() so that memory online operation fails when memory zone cannot be changed as follows: Before applying patch: # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 7864320 managed 7864320 # echo online_movable > memory4097/state # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 8388608 managed 8388608 online_movable operation succeeded. But memory is onlined as ZONE_NORMAL, not ZONE_MOVABLE. After applying patch: # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 7864320 managed 7864320 # echo online_movable > memory4097/state bash: echo: write error: Invalid argument # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 7864320 managed 7864320 online_movable operation failed because of failure of changing the memory zone from ZONE_NORMAL to ZONE_MOVABLE Fixes: df429ac03936 ("memory-hotplug: more general validation of zone during online") Link: http://lkml.kernel.org/r/2f9c3837-33d7-b6e5-59c0-6ca4372b2d84@gmail.com Signed-off-by: Yasuaki Ishimatsu Reviewed-by: Reza Arbab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/memory.c | 4 ++-- include/linux/memory_hotplug.h | 4 ++-- mm/memory_hotplug.c | 28 +++++++++++++++++----------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 62c63c0c5c22..e7f86a8887d2 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -410,14 +410,14 @@ static ssize_t show_valid_zones(struct device *dev, sprintf(buf, "%s", zone->name); /* MMOP_ONLINE_KERNEL */ - zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL); + zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL, &zone_shift); if (zone_shift) { strcat(buf, " "); strcat(buf, (zone + zone_shift)->name); } /* MMOP_ONLINE_MOVABLE */ - zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE); + zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE, &zone_shift); if (zone_shift) { strcat(buf, " "); strcat(buf, (zone + zone_shift)->name); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 01033fadea47..c1784c0b4f35 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -284,7 +284,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); -extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages, - enum zone_type target); +extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, + enum zone_type target, int *zone_shift); #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index cad4b9125695..c3a8141ac788 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1033,36 +1033,39 @@ static void node_states_set_node(int node, struct memory_notify *arg) node_set_state(node, N_MEMORY); } -int zone_can_shift(unsigned long pfn, unsigned long nr_pages, - enum zone_type target) +bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, + enum zone_type target, int *zone_shift) { struct zone *zone = page_zone(pfn_to_page(pfn)); enum zone_type idx = zone_idx(zone); int i; + *zone_shift = 0; + if (idx < target) { /* pages must be at end of current zone */ if (pfn + nr_pages != zone_end_pfn(zone)) - return 0; + return false; /* no zones in use between current zone and target */ for (i = idx + 1; i < target; i++) if (zone_is_initialized(zone - idx + i)) - return 0; + return false; } if (target < idx) { /* pages must be at beginning of current zone */ if (pfn != zone->zone_start_pfn) - return 0; + return false; /* no zones in use between current zone and target */ for (i = target + 1; i < idx; i++) if (zone_is_initialized(zone - idx + i)) - return 0; + return false; } - return target - idx; + *zone_shift = target - idx; + return true; } /* Must be protected by mem_hotplug_begin() */ @@ -1089,10 +1092,13 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ !can_online_high_movable(zone)) return -EINVAL; - if (online_type == MMOP_ONLINE_KERNEL) - zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL); - else if (online_type == MMOP_ONLINE_MOVABLE) - zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE); + if (online_type == MMOP_ONLINE_KERNEL) { + if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift)) + return -EINVAL; + } else if (online_type == MMOP_ONLINE_MOVABLE) { + if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift)) + return -EINVAL; + } zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages); if (!zone) -- GitLab From 776050a9b55e17b72b5684794b9580d72e920e17 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 10 Jan 2017 17:51:17 +0000 Subject: [PATCH 1301/1442] virtio_mmio: Set DMA masks appropriately commit f7f6634d23830ff74335734fbdb28ea109c1f349 upstream. Once DMA API usage is enabled, it becomes apparent that virtio-mmio is inadvertently relying on the default 32-bit DMA mask, which leads to problems like rapidly exhausting SWIOTLB bounce buffers. Ensure that we set the appropriate 64-bit DMA mask whenever possible, with the coherent mask suitably limited for the legacy vring as per a0be1db4304f ("virtio_pci: Limit DMA mask to 44 bits for legacy virtio devices"). Cc: Andy Lutomirski Cc: Michael S. Tsirkin Reported-by: Jean-Philippe Brucker Fixes: b42111382f0e ("virtio_mmio: Use the DMA API if enabled") Signed-off-by: Robin Murphy Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_mmio.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 48bfea91dbca..50840984fbfa 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -59,6 +59,7 @@ #define pr_fmt(fmt) "virtio-mmio: " fmt #include +#include #include #include #include @@ -497,6 +498,7 @@ static int virtio_mmio_probe(struct platform_device *pdev) struct virtio_mmio_device *vm_dev; struct resource *mem; unsigned long magic; + int rc; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!mem) @@ -545,9 +547,25 @@ static int virtio_mmio_probe(struct platform_device *pdev) } vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID); - if (vm_dev->version == 1) + if (vm_dev->version == 1) { writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + /* + * In the legacy case, ensure our coherently-allocated virtio + * ring will be at an address expressable as a 32-bit PFN. + */ + if (!rc) + dma_set_coherent_mask(&pdev->dev, + DMA_BIT_MASK(32 + PAGE_SHIFT)); + } else { + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + } + if (rc) + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (rc) + dev_warn(&pdev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); + platform_set_drvdata(pdev, vm_dev); return register_virtio_device(&vm_dev->vdev); -- GitLab From adb260d15134f3aabd6e1bf39ee7f4ea28385c2d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 7 Jan 2017 09:33:34 +0300 Subject: [PATCH 1302/1442] platform/x86: mlx-platform: free first dev on error commit 63d762b88cb5510f2bfdb5112ced18cde867ae61 upstream. There is an off-by-one error so we don't unregister priv->pdev_mux[0]. Also it's slightly simpler as a while loop instead of a for loop. Fixes: 58cbbee2391c ("x86/platform/mellanox: Introduce support for Mellanox systems platform") Signed-off-by: Dan Carpenter Acked-by: Vadim Pasternak Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/mellanox/mlx-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/mellanox/mlx-platform.c b/arch/x86/platform/mellanox/mlx-platform.c index 7dcfcca97399..c0355d789fce 100644 --- a/arch/x86/platform/mellanox/mlx-platform.c +++ b/arch/x86/platform/mellanox/mlx-platform.c @@ -233,7 +233,7 @@ static int __init mlxplat_init(void) return 0; fail_platform_mux_register: - for (i--; i > 0 ; i--) + while (--i >= 0) platform_device_unregister(priv->pdev_mux[i]); platform_device_unregister(priv->pdev_i2c); fail_alloc: -- GitLab From 98185d4b18a14dad0b95efd85225960b5435727d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 19 Jan 2017 18:39:40 +0200 Subject: [PATCH 1303/1442] platform/x86: intel_mid_powerbtn: Set IRQ_ONESHOT commit 5a00b6c2438460b870a451f14593fc40d3c7edf6 upstream. The commit 1c6c69525b40 ("genirq: Reject bogus threaded irq requests") starts refusing misconfigured interrupt handlers. This makes intel_mid_powerbtn not working anymore. Add a mandatory flag to a threaded IRQ request in the driver. Fixes: 1c6c69525b40 ("genirq: Reject bogus threaded irq requests") Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel_mid_powerbtn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index 1fc0de870ff8..361770568ad0 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -77,7 +77,7 @@ static int mfld_pb_probe(struct platform_device *pdev) input_set_capability(input, EV_KEY, KEY_POWER); - error = request_threaded_irq(irq, NULL, mfld_pb_isr, 0, + error = request_threaded_irq(irq, NULL, mfld_pb_isr, IRQF_ONESHOT, DRIVER_NAME, input); if (error) { dev_err(&pdev->dev, "Unable to request irq %d for mfld power" -- GitLab From f5f415c13209c5a7a71da85462815288321483df Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 24 Jan 2017 15:18:10 -0800 Subject: [PATCH 1304/1442] mm, memcg: do not retry precharge charges commit 3674534b775354516e5c148ea48f51d4d1909a78 upstream. When memory.move_charge_at_immigrate is enabled and precharges are depleted during move, mem_cgroup_move_charge_pte_range() will attempt to increase the size of the precharge. Prevent precharges from ever looping by setting __GFP_NORETRY. This was probably the intention of the GFP_KERNEL & ~__GFP_NORETRY, which is pointless as written. Fixes: 0029e19ebf84 ("mm: memcontrol: remove explicit OOM parameter in charge path") Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1701130208510.69402@chino.kir.corp.google.com Signed-off-by: David Rientjes Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d536a9daa511..4c6ade54d833 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4360,9 +4360,9 @@ static int mem_cgroup_do_precharge(unsigned long count) return ret; } - /* Try charges one by one with reclaim */ + /* Try charges one by one with reclaim, but do not retry */ while (count--) { - ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1); + ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1); if (ret) return ret; mc.precharge++; -- GitLab From 922813f4d66fb317e8602d058d03a1619af1ffd0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Jan 2017 21:09:50 +0100 Subject: [PATCH 1305/1442] perf/core: Fix concurrent sys_perf_event_open() vs. 'move_group' race commit 321027c1fe77f892f4ea07846aeae08cefbbb290 upstream. Di Shen reported a race between two concurrent sys_perf_event_open() calls where both try and move the same pre-existing software group into a hardware context. The problem is exactly that described in commit: f63a8daa5812 ("perf: Fix event->ctx locking") ... where, while we wait for a ctx->mutex acquisition, the event->ctx relation can have changed under us. That very same commit failed to recognise sys_perf_event_context() as an external access vector to the events and thereby didn't apply the established locking rules correctly. So while one sys_perf_event_open() call is stuck waiting on mutex_lock_double(), the other (which owns said locks) moves the group about. So by the time the former sys_perf_event_open() acquires the locks, the context we've acquired is stale (and possibly dead). Apply the established locking rules as per perf_event_ctx_lock_nested() to the mutex_lock_double() for the 'move_group' case. This obviously means we need to validate state after we acquire the locks. Reported-by: Di Shen (Keen Lab) Tested-by: John Dias Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kees Cook Cc: Linus Torvalds Cc: Min Chong Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: f63a8daa5812 ("perf: Fix event->ctx locking") Link: http://lkml.kernel.org/r/20170106131444.GZ3174@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 58 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 02c8421f8c01..e5a8839e7076 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9503,6 +9503,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) return 0; } +/* + * Variation on perf_event_ctx_lock_nested(), except we take two context + * mutexes. + */ +static struct perf_event_context * +__perf_event_ctx_lock_double(struct perf_event *group_leader, + struct perf_event_context *ctx) +{ + struct perf_event_context *gctx; + +again: + rcu_read_lock(); + gctx = READ_ONCE(group_leader->ctx); + if (!atomic_inc_not_zero(&gctx->refcount)) { + rcu_read_unlock(); + goto again; + } + rcu_read_unlock(); + + mutex_lock_double(&gctx->mutex, &ctx->mutex); + + if (group_leader->ctx != gctx) { + mutex_unlock(&ctx->mutex); + mutex_unlock(&gctx->mutex); + put_ctx(gctx); + goto again; + } + + return gctx; +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -9746,12 +9777,31 @@ SYSCALL_DEFINE5(perf_event_open, } if (move_group) { - gctx = group_leader->ctx; - mutex_lock_double(&gctx->mutex, &ctx->mutex); + gctx = __perf_event_ctx_lock_double(group_leader, ctx); + if (gctx->task == TASK_TOMBSTONE) { err = -ESRCH; goto err_locked; } + + /* + * Check if we raced against another sys_perf_event_open() call + * moving the software group underneath us. + */ + if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) { + /* + * If someone moved the group out from under us, check + * if this new event wound up on the same ctx, if so + * its the regular !move_group case, otherwise fail. + */ + if (gctx != ctx) { + err = -EINVAL; + goto err_locked; + } else { + perf_event_ctx_unlock(group_leader, gctx); + move_group = 0; + } + } } else { mutex_lock(&ctx->mutex); } @@ -9853,7 +9903,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_unpin_context(ctx); if (move_group) - mutex_unlock(&gctx->mutex); + perf_event_ctx_unlock(group_leader, gctx); mutex_unlock(&ctx->mutex); if (task) { @@ -9879,7 +9929,7 @@ SYSCALL_DEFINE5(perf_event_open, err_locked: if (move_group) - mutex_unlock(&gctx->mutex); + perf_event_ctx_unlock(group_leader, gctx); mutex_unlock(&ctx->mutex); /* err_file: */ fput(event_file); -- GitLab From b59dd202f231431c256b56de129e979d9563612f Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 12 Jan 2017 12:44:54 +0200 Subject: [PATCH 1306/1442] drm/i915: Remove WaDisableLSQCROPERFforOCL KBL workaround. commit 4fc020d864647ea3ae8cb8f17d63e48e87ebd0bf upstream. The WaDisableLSQCROPERFforOCL workaround has the side effect of disabling an L3SQ optimization that has huge performance implications and is unlikely to be necessary for the correct functioning of usual graphic workloads. Userspace is free to re-enable the workaround on demand, and is generally in a better position to determine whether the workaround is necessary than the DRM is (e.g. only during the execution of compute kernels that rely on both L3 fences and HDC R/W requests). The same workaround seems to apply to BDW (at least to production stepping G1) and SKL as well (the internal workaround database claims that it does for all steppings, while the BSpec workaround table only mentions pre-production steppings), but the DRM doesn't do anything beyond whitelisting the L3SQCREG4 register so userspace can enable it when it sees fit. Do the same on KBL platforms. Improves performance of the GFXBench4 gl_manhattan31 benchmark by 60%, and gl_4 (AKA car chase) by 14% on a KBL GT2 running Mesa master -- This is followed by a regression of 35% and 10% respectively for the same benchmarks and platform caused by my recent patch series switching userspace to use the dataport constant cache instead of the sampler to implement uniform pull constant loads, which caused us to hit more heavily the L3 cache (and on platforms other than KBL had the opposite effect of improving performance of the same two benchmarks). The overall effect on KBL of this change combined with the recent userspace change is respectively 4.6% and 2.6%. SynMark2 OglShMapPcf was affected by the constant cache changes (though it improved as it did on other platforms rather than regressing), but is not significantly affected by this patch (with statistical significance of 5% and sample size 20). v2: Drop some more code to avoid unused variable warning. Fixes: 738fa1b3123f ("drm/i915/kbl: Add WaDisableLSQCROPERFforOCL") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99256 Signed-off-by: Francisco Jerez Cc: Matthew Auld Cc: Eero Tamminen Cc: Jani Nikula Cc: Mika Kuoppala Cc: beignet@lists.freedesktop.org Reviewed-by: Mika Kuoppala [Removed double Fixes tag] Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1484217894-20505-1-git-send-email-mika.kuoppala@intel.com (cherry picked from commit 8726f2faa371514fba2f594d799db95203dfeee0) Signed-off-by: Jani Nikula [ Francisco Jerez: Rebase on v4.9 branch. ] Signed-off-by: Francisco Jerez Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lrc.c | 3 +-- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 -------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0adb879833ff..67db1577ee49 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -858,8 +858,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, * this batch updates GEN8_L3SQCREG4 with default value we need to * set this bit here to retain the WA during flush. */ - if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0) || - IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) + if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0)) l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS; wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ed9955dce156..8babfe0ce4e3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1153,14 +1153,6 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HDC_CHICKEN0, HDC_FENCE_DEST_SLM_DISABLE); - /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes - * involving this register should also be added to WA batch as required. - */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) - /* WaDisableLSQCROPERFforOCL:kbl */ - I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_RO_PERF_DIS); - /* WaToEnableHwFixForPushConstHWBug:kbl */ if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, -- GitLab From fd2ffe57dda03cb070204f53864ecdfd002aa2e3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Feb 2017 08:33:31 +0100 Subject: [PATCH 1307/1442] Linux 4.9.7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ef95231d1625..da704d903321 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 6 +SUBLEVEL = 7 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 44cbb187eb691295060ed1199aec027652c08f88 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Wed, 1 Feb 2017 12:53:45 +0530 Subject: [PATCH 1308/1442] ANDROID: binder: fix format specifier for type binder_size_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix following warning on 32bit ARCH build: CC drivers/android/binder.o drivers/android/binder.c: In function ‘binder_transaction’: ./include/linux/kern_levels.h:4:18: warning: format ‘%lld’ expects argument of type ‘long long int’, but argument 4 has type ‘binder_size_t {aka unsigned int}’ [-Wformat=] drivers/android/binder.c:2047:3: note: in expansion of macro ‘binder_user_error’ binder_user_error("%d:%d got transaction with unaligned buffers size, %lld\n", ^ Fixes: Change-Id: I02417f28cff14688f2e1d6fcb959438fd96566cc (ANDROID: binder: support for scatter-gather.") Signed-off-by: Amit Pundir --- drivers/android/binder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 6d3f4eeadd28..1bd840198c28 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2046,7 +2046,7 @@ static void binder_transaction(struct binder_proc *proc, if (!IS_ALIGNED(extra_buffers_size, sizeof(u64))) { binder_user_error("%d:%d got transaction with unaligned buffers size, %lld\n", proc->pid, thread->pid, - extra_buffers_size); + (u64)extra_buffers_size); return_error = BR_FAILED_REPLY; goto err_bad_offset; } -- GitLab From 61100bdefbab9fdad84d8bc6f04cf2f18cd4892d Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Fri, 14 Nov 2014 17:16:41 +0000 Subject: [PATCH 1309/1442] ANDROID: arm: topology: Define TC2 energy and provide it to the scheduler This patch is only here to be able to test provisioning of energy related data from an arch topology shim layer to the scheduler. Since there is no code today which deals with extracting energy related data from the dtb or acpi, and process it in the topology shim layer, the content of the sched_group_energy structures as well as the idle_state and capacity_state arrays are hard-coded here. This patch defines the sched_group_energy structure as well as the idle_state and capacity_state array for the cluster (relates to sched groups (sgs) in DIE sched domain level) and for the core (relates to sgs in MC sd level) for a Cortex A7 as well as for a Cortex A15. It further provides related implementations of the sched_domain_energy_f functions (cpu_cluster_energy() and cpu_core_energy()). To be able to propagate this information from the topology shim layer to the scheduler, the elements of the arm_topology[] table have been provisioned with the appropriate sched_domain_energy_f functions. Change-Id: I8c014bbd04f6a1d57892be9bfa16affe07948dcf cc: Russell King Signed-off-by: Dietmar Eggemann [AmitP: cherry-picked from common/android-4.4] Signed-off-by: Amit Pundir --- arch/arm/kernel/topology.c | 126 ++++++++++++++++++++++++++++++++++++- 1 file changed, 123 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 083f90fd009f..607b2354adb5 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -287,6 +287,127 @@ void store_cpu_topology(unsigned int cpuid) cpu_topology[cpuid].socket_id, mpidr); } +/* + * ARM TC2 specific energy cost model data. There are no unit requirements for + * the data. Data can be normalized to any reference point, but the + * normalization must be consistent. That is, one bogo-joule/watt must be the + * same quantity for all data, but we don't care what it is. + */ +static struct idle_state idle_states_cluster_a7[] = { + { .power = 25 }, /* arch_cpu_idle() (active idle) = WFI */ + { .power = 25 }, /* WFI */ + { .power = 10 }, /* cluster-sleep-l */ + }; + +static struct idle_state idle_states_cluster_a15[] = { + { .power = 70 }, /* arch_cpu_idle() (active idle) = WFI */ + { .power = 70 }, /* WFI */ + { .power = 25 }, /* cluster-sleep-b */ + }; + +static struct capacity_state cap_states_cluster_a7[] = { + /* Cluster only power */ + { .cap = 150, .power = 2967, }, /* 350 MHz */ + { .cap = 172, .power = 2792, }, /* 400 MHz */ + { .cap = 215, .power = 2810, }, /* 500 MHz */ + { .cap = 258, .power = 2815, }, /* 600 MHz */ + { .cap = 301, .power = 2919, }, /* 700 MHz */ + { .cap = 344, .power = 2847, }, /* 800 MHz */ + { .cap = 387, .power = 3917, }, /* 900 MHz */ + { .cap = 430, .power = 4905, }, /* 1000 MHz */ + }; + +static struct capacity_state cap_states_cluster_a15[] = { + /* Cluster only power */ + { .cap = 426, .power = 7920, }, /* 500 MHz */ + { .cap = 512, .power = 8165, }, /* 600 MHz */ + { .cap = 597, .power = 8172, }, /* 700 MHz */ + { .cap = 682, .power = 8195, }, /* 800 MHz */ + { .cap = 768, .power = 8265, }, /* 900 MHz */ + { .cap = 853, .power = 8446, }, /* 1000 MHz */ + { .cap = 938, .power = 11426, }, /* 1100 MHz */ + { .cap = 1024, .power = 15200, }, /* 1200 MHz */ + }; + +static struct sched_group_energy energy_cluster_a7 = { + .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a7), + .idle_states = idle_states_cluster_a7, + .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a7), + .cap_states = cap_states_cluster_a7, +}; + +static struct sched_group_energy energy_cluster_a15 = { + .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a15), + .idle_states = idle_states_cluster_a15, + .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a15), + .cap_states = cap_states_cluster_a15, +}; + +static struct idle_state idle_states_core_a7[] = { + { .power = 0 }, /* arch_cpu_idle (active idle) = WFI */ + { .power = 0 }, /* WFI */ + { .power = 0 }, /* cluster-sleep-l */ + }; + +static struct idle_state idle_states_core_a15[] = { + { .power = 0 }, /* arch_cpu_idle (active idle) = WFI */ + { .power = 0 }, /* WFI */ + { .power = 0 }, /* cluster-sleep-b */ + }; + +static struct capacity_state cap_states_core_a7[] = { + /* Power per cpu */ + { .cap = 150, .power = 187, }, /* 350 MHz */ + { .cap = 172, .power = 275, }, /* 400 MHz */ + { .cap = 215, .power = 334, }, /* 500 MHz */ + { .cap = 258, .power = 407, }, /* 600 MHz */ + { .cap = 301, .power = 447, }, /* 700 MHz */ + { .cap = 344, .power = 549, }, /* 800 MHz */ + { .cap = 387, .power = 761, }, /* 900 MHz */ + { .cap = 430, .power = 1024, }, /* 1000 MHz */ + }; + +static struct capacity_state cap_states_core_a15[] = { + /* Power per cpu */ + { .cap = 426, .power = 2021, }, /* 500 MHz */ + { .cap = 512, .power = 2312, }, /* 600 MHz */ + { .cap = 597, .power = 2756, }, /* 700 MHz */ + { .cap = 682, .power = 3125, }, /* 800 MHz */ + { .cap = 768, .power = 3524, }, /* 900 MHz */ + { .cap = 853, .power = 3846, }, /* 1000 MHz */ + { .cap = 938, .power = 5177, }, /* 1100 MHz */ + { .cap = 1024, .power = 6997, }, /* 1200 MHz */ + }; + +static struct sched_group_energy energy_core_a7 = { + .nr_idle_states = ARRAY_SIZE(idle_states_core_a7), + .idle_states = idle_states_core_a7, + .nr_cap_states = ARRAY_SIZE(cap_states_core_a7), + .cap_states = cap_states_core_a7, +}; + +static struct sched_group_energy energy_core_a15 = { + .nr_idle_states = ARRAY_SIZE(idle_states_core_a15), + .idle_states = idle_states_core_a15, + .nr_cap_states = ARRAY_SIZE(cap_states_core_a15), + .cap_states = cap_states_core_a15, +}; + +/* sd energy functions */ +static inline +const struct sched_group_energy * const cpu_cluster_energy(int cpu) +{ + return cpu_topology[cpu].socket_id ? &energy_cluster_a7 : + &energy_cluster_a15; +} + +static inline +const struct sched_group_energy * const cpu_core_energy(int cpu) +{ + return cpu_topology[cpu].socket_id ? &energy_core_a7 : + &energy_core_a15; +} + static inline int cpu_corepower_flags(void) { return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN | \ @@ -295,10 +416,9 @@ static inline int cpu_corepower_flags(void) static struct sched_domain_topology_level arm_topology[] = { #ifdef CONFIG_SCHED_MC - { cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) }, - { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, + { cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) }, #endif - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { cpu_cpu_mask, NULL, cpu_cluster_energy, SD_INIT_NAME(DIE) }, { NULL, }, }; -- GitLab From 8aab20cfdc6dcd4a70d1a29d353597faf7bef9ed Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 29 Aug 2016 19:48:17 +0530 Subject: [PATCH 1310/1442] ANDROID: DEBUG: cpufreq: fix cpu_capacity tracing build for non-smp systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cpu curr capacity can only be traced for SMP systems. Non-SMP builds will fail with: drivers/cpufreq/cpufreq.c: In function ‘cpufreq_freq_transition_begin’: drivers/cpufreq/cpufreq.c:438:22: error: implicit declaration of function ‘capacity_curr_of’ [-Werror=implicit-function-declaration] trace_cpu_capacity(capacity_curr_of(cpu), cpu); ^ Change-Id: I48f870a5691d1afee5b60c3df3024cd8fb439516 Fixes: ("DEBUG: sched,cpufreq: add cpu_capacity change tracepoint") Signed-off-by: Amit Pundir Signed-off-by: John Stultz --- drivers/cpufreq/cpufreq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6a3d07869a5b..019e81797418 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -29,7 +29,9 @@ #include #include #include +#ifdef CONFIG_SMP #include +#endif #include static LIST_HEAD(cpufreq_policy_list); @@ -429,7 +431,9 @@ static void cpufreq_notify_post_transition(struct cpufreq_policy *policy, void cpufreq_freq_transition_begin(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs) { +#ifdef CONFIG_SMP int cpu; +#endif /* * Catch double invocations of _begin() which lead to self-deadlock. @@ -458,8 +462,10 @@ void cpufreq_freq_transition_begin(struct cpufreq_policy *policy, spin_unlock(&policy->transition_lock); scale_freq_capacity(policy, freqs); +#ifdef CONFIG_SMP for_each_cpu(cpu, policy->cpus) trace_cpu_capacity(capacity_curr_of(cpu), cpu); +#endif cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE); } -- GitLab From be65fb01da4d2062796e1d6acd18e1724c188360 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 2 Feb 2017 15:00:09 +0530 Subject: [PATCH 1311/1442] ANDROID: net: ipv6: remove unused variable ifindex in Remove unused variable "ifindex" in rt6_get_route_info(). "ifindex" is not used anymore, dev->ifindex is used directly instead. Signed-off-by: Amit Pundir --- net/ipv6/route.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 89ab3dc9dbe2..b245f6fa4ecf 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2341,7 +2341,6 @@ static struct rt6_info *rt6_get_route_info(struct net *net, struct net_device *dev) { u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO); - int ifindex = dev->ifindex; struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; -- GitLab From 057393410d25885c2dbeda7f4eee87258796f240 Mon Sep 17 00:00:00 2001 From: Steve Muckle Date: Wed, 4 May 2016 18:56:45 -0700 Subject: [PATCH 1312/1442] ANDROID: arm: Fix build error "conflicting types for 'scale_cpu_capacity'" Commit "arm: Update arch_scale_cpu_capacity() to reflect change to define" introduced a dependency on struct sched_domain in arch/arm/include/asm/topologoy.h, but that structure is only currently defined if CONFIG_CPU_FREQ is enabled, which causes include/linux/cpufreq.h to get pulled in which defines it. Include regardless of CONFIG_CPU_FREQ so struct sched_domain is always defined. Change-Id: I07bdfa6085f2322afe8be3c30c7c3d5a074ff5a7 Fixes: Change-Id: I372bd5e4c1e203428d72b18c8a806b06f3567ef6 ("arm: Update arch_scale_cpu_capacity() to reflect change to define") Signed-off-by: Steve Muckle Signed-off-by: Amit Pundir [jstultz: Cherry-picked from android-3.18] Signed-off-by: John Stultz --- arch/arm/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index e3e596cbb1a7..d06064120694 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -3,6 +3,7 @@ #ifdef CONFIG_ARM_CPU_TOPOLOGY +#include #include struct cputopo_arm { @@ -25,7 +26,6 @@ void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); #ifdef CONFIG_CPU_FREQ -#include #define arch_scale_freq_capacity cpufreq_scale_freq_capacity #endif #define arch_scale_cpu_capacity scale_cpu_capacity -- GitLab From aa9ea8421abee6f350a1e6fb6eb91c35159e628f Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Thu, 2 Jun 2016 12:18:08 +0000 Subject: [PATCH 1313/1442] ANDROID: arm: Fix #if/#ifdef typo in topology.c Probably a typo in arch/arm/kernel/topology.c This patch fixes the warning... arch/arm/kernel/topology.c: In function 'scale_cpu_capacity': arch/arm/kernel/topology.c:47:5: warning: "CONFIG_CPU_FREQ" is not defined [-Wundef] Change-Id: I398e5271d0a30f12c3a3fdc12a90829ac612734e Fixes: Change-Id: If5e9e0ba8ff5a5d3236b373dbce8c72ea71b5e18 ("arm: Enable max freq invariant scheduler load-tracking and capacity support") Signed-off-by: Jon Medhurst Signed-off-by: Amit Pundir [jstultz: Cherry-picked from android-3.18] Signed-off-by: John Stultz --- arch/arm/kernel/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 607b2354adb5..df3020071f32 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -44,7 +44,7 @@ static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) { -#if CONFIG_CPU_FREQ +#ifdef CONFIG_CPU_FREQ unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu); return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT; -- GitLab From 12e056cd56f60edbcdf7682a77acf098ca0aa94f Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Thu, 8 Dec 2016 14:02:10 +0800 Subject: [PATCH 1314/1442] ANDROID: sched: fix wrong truncation of walt_avg The result of "__entry->walt_avg = (__entry->demand << 10)" will exceed the range of "unsigned int", which will be truncated and make the trace looks like as follows: UnityMain-4588 [004] 6029.645672: walt_update_history: 4588(UnityMain): runtime 9928307 samples 1 event 4 demand 9928307 walt 157 pelt 870 (hist: 9928307 9604307 8440077 87392 34144328) cpu 4 UnityMain-4588 [004] 6029.653658: walt_update_history: 4588(UnityMain): runtime 10000000 samples 1 event 4 demand 10000000 walt 165 pelt 886 (hist: 10000000 9955691 6549308 64000 34144328) cpu 4 Fix this by using a u64 type instead of unsgined int type and make the trace as below: UnityMain-4617 [004] 117.613558: walt_update_history: 4617(UnityMain): runtime 5770597 samples 1 event 4 demand 7038739 walt 720 pelt 680 (hist: 5770597 7680001 8904509 65596 156) cpu 4 UnityMain-4617 [004] 117.633560: walt_update_history: 4617(UnityMain): runtime 9911238 samples 1 event 4 demand 9911238 walt 1014 pelt 769 (hist: 9911238 5770597 7680001 0 1664188058) cpu 4 Signed-off-by: Ke Wang [AmitP: cherry-picked from aosp/android-3.18] Signed-off-by: Amit Pundir --- include/trace/events/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index a1edd59ad81f..270075c802d7 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -1019,7 +1019,7 @@ TRACE_EVENT(walt_update_history, __field( int, samples ) __field( int, evt ) __field( u64, demand ) - __field(unsigned int, walt_avg ) + __field( u64, walt_avg ) __field(unsigned int, pelt_avg ) __array( u32, hist, RAVG_HIST_SIZE_MAX) __field( int, cpu ) @@ -1040,7 +1040,7 @@ TRACE_EVENT(walt_update_history, ), TP_printk("%d (%s): runtime %u samples %d event %d demand %llu" - " walt %u pelt %u (hist: %u %u %u %u %u) cpu %d", + " walt %llu pelt %u (hist: %u %u %u %u %u) cpu %d", __entry->pid, __entry->comm, __entry->runtime, __entry->samples, __entry->evt, __entry->demand, -- GitLab From 102f7f4c456f0ed7f0f42f5c67b9bef062811099 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Wed, 24 Aug 2016 11:52:17 +0530 Subject: [PATCH 1315/1442] ANDROID: sched/walt: use do_div instead of division operator Use do_div() instead of "/" operator to fix, undefined references to "__aeabi_uldivmod" or "__udivdi3", build errors for 32bit ARCHs. Also in TP_fast_assign(), along with do_div() usage, replace "," with ";" which would have resulted in a syntax error (!), because '#define TP_fast_assign(args...) args' would have stripped off the "," and left white space between these two assignments after CPP phase. Change-Id: I095f9cfb4dd9d58ef20cbb9c58b0711be6df9da3 Signed-off-by: Amit Pundir --- include/trace/events/sched.h | 3 ++- kernel/sched/sched.h | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 270075c802d7..d9169f9d783a 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -1032,7 +1032,8 @@ TRACE_EVENT(walt_update_history, __entry->samples = samples; __entry->evt = evt; __entry->demand = p->ravg.demand; - __entry->walt_avg = (__entry->demand << 10) / walt_ravg_window, + __entry->walt_avg = (__entry->demand << 10); + do_div(__entry->walt_avg, walt_ravg_window); __entry->pelt_avg = p->se.avg.util_avg; memcpy(__entry->hist, p->ravg.sum_history, RAVG_HIST_SIZE_MAX * sizeof(u32)); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d2cdc3aaf9d9..e9ed87ea20eb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1640,9 +1640,10 @@ static inline unsigned long __cpu_util(int cpu, int delta) unsigned long capacity = capacity_orig_of(cpu); #ifdef CONFIG_SCHED_WALT - if (!walt_disabled && sysctl_sched_use_walt_cpu_util) - util = (cpu_rq(cpu)->prev_runnable_sum << SCHED_CAPACITY_SHIFT) / - walt_ravg_window; + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { + util = cpu_rq(cpu)->prev_runnable_sum << SCHED_CAPACITY_SHIFT; + do_div(util, walt_ravg_window); + } #endif delta += util; if (delta < 0) -- GitLab From dc89d136bd5069473bdf59419cce85558dd15c2d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 13 Dec 2016 10:33:34 -0800 Subject: [PATCH 1316/1442] FROMLIST: 9p: fix a potential acl leak (https://lkml.org/lkml/2016/12/13/579) posix_acl_update_mode() could possibly clear 'acl', if so we leak the memory pointed by 'acl'. Save this pointer before calling posix_acl_update_mode() and release the memory if 'acl' really gets cleared. Reported-by: Mark Salyzyn Reviewed-by: Jan Kara Reviewed-by: Greg Kurz Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Cong Wang Bug: 32458736 Change-Id: Ia78da401e6fd1bfd569653bd2cd0ebd3f9c737a0 --- fs/9p/acl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/9p/acl.c b/fs/9p/acl.c index b3c2cc79c20d..082d227fa56b 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -277,6 +277,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler, case ACL_TYPE_ACCESS: if (acl) { struct iattr iattr; + struct posix_acl *old_acl = acl; retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl); if (retval) @@ -287,6 +288,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler, * by the mode bits. So don't * update ACL. */ + posix_acl_release(old_acl); value = NULL; size = 0; } -- GitLab From a37f2311e63c8989675e297cdc9300cbec6e2217 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Wed, 11 Jan 2017 16:25:34 +0800 Subject: [PATCH 1317/1442] r8152: fix the sw rx checksum is unavailable [ Upstream commit 19c0f40d4fca3a47b8f784a627f0467f0138ccc8 ] Fix the hw rx checksum is always enabled, and the user couldn't switch it to sw rx checksum. Note that the RTL_VER_01 only support sw rx checksum only. Besides, the hw rx checksum for RTL_VER_02 is disabled after commit b9a321b48af4 ("r8152: Fix broken RX checksums."). Re-enable it. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 4b5cb162442b..be4c86faba4f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1730,7 +1730,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc) u8 checksum = CHECKSUM_NONE; u32 opts2, opts3; - if (tp->version == RTL_VER_01 || tp->version == RTL_VER_02) + if (!(tp->netdev->features & NETIF_F_RXCSUM)) goto return_result; opts2 = le32_to_cpu(rx_desc->opts2); @@ -4358,6 +4358,11 @@ static int rtl8152_probe(struct usb_interface *intf, NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | NETIF_F_IPV6_CSUM | NETIF_F_TSO6; + if (tp->version == RTL_VER_01) { + netdev->features &= ~NETIF_F_RXCSUM; + netdev->hw_features &= ~NETIF_F_RXCSUM; + } + netdev->ethtool_ops = &ops; netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE); -- GitLab From 5b3df4401064ef94cddac976aba1474a7050fa5e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 11 Jan 2017 09:16:32 -0800 Subject: [PATCH 1318/1442] netvsc: add rcu_read locking to netvsc callback [ Upstream commit 0719e72ccb801829a3d735d187ca8417f0930459 ] The receive callback (in tasklet context) is using RCU to get reference to associated VF network device but this is not safe. RCU read lock needs to be held. Found by running with full lockdep debugging enabled. Fixes: f207c10d9823 ("hv_netvsc: use RCU to protect vf_netdev") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c9140c3aeb67..ff038e507fd6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -659,6 +659,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, * policy filters on the host). Deliver these via the VF * interface in the guest. */ + rcu_read_lock(); vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); if (vf_netdev && (vf_netdev->flags & IFF_UP)) net = vf_netdev; @@ -667,6 +668,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); if (unlikely(!skb)) { ++net->stats.rx_dropped; + rcu_read_unlock(); return NVSP_STAT_FAIL; } @@ -696,6 +698,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, * TODO - use NAPI? */ netif_rx(skb); + rcu_read_unlock(); return 0; } -- GitLab From 7c249f3306b6f42f7a0a9d9cbd0455035f8a067c Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 12 Jan 2017 09:10:37 +0100 Subject: [PATCH 1319/1442] mlxsw: spectrum: Fix memory leak at skb reallocation [ Upstream commit 36bf38d158d3482119b3e159c0619b3c1539b508 ] During transmission the skb is checked for headroom in order to add vendor specific header. In case the skb needs to be re-allocated, skb_realloc_headroom() is called to make a private copy of the original, but doesn't release it. Current code assumes that the original skb is released during reallocation and only releases it at the error path which causes a memory leak. Fix this by adding the original skb release to the main path. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Signed-off-by: Arkadi Sharshevsky Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index dda5761e91bc..f902c4d3de99 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -684,6 +684,7 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, dev_kfree_skb_any(skb_orig); return NETDEV_TX_OK; } + dev_consume_skb_any(skb_orig); } if (eth_skb_pad(skb)) { -- GitLab From 4ec59d1fe470bad37dc0d388f65c150ccce8ea11 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 12 Jan 2017 09:10:38 +0100 Subject: [PATCH 1320/1442] mlxsw: switchx2: Fix memory leak at skb reallocation [ Upstream commit 400fc0106dd8c27ed84781c929c1a184785b9c79 ] During transmission the skb is checked for headroom in order to add vendor specific header. In case the skb needs to be re-allocated, skb_realloc_headroom() is called to make a private copy of the original, but doesn't release it. Current code assumes that the original skb is released during reallocation and only releases it at the error path which causes a memory leak. Fix this by adding the original skb release to the main path. Fixes: d003462a50de ("mlxsw: Simplify mlxsw_sx_port_xmit function") Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 92bda8703f87..d548f0a55174 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -314,6 +314,7 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, dev_kfree_skb_any(skb_orig); return NETDEV_TX_OK; } + dev_consume_skb_any(skb_orig); } mlxsw_sx_txhdr_construct(skb, &tx_info); /* TX header is consumed by HW on the way so we shouldn't count its -- GitLab From ec1aa8d495326435d956af325391b775038daa08 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Thu, 12 Jan 2017 09:10:39 +0100 Subject: [PATCH 1321/1442] mlxsw: pci: Fix EQE structure definition [ Upstream commit 28e46a0f2e03ab4ed0e23cace1ea89a68c8c115b ] The event_data starts from address 0x00-0x0C and not from 0x08-0x014. This leads to duplication with other fields in the Event Queue Element such as sub-type, cqn and owner. Fixes: eda6500a987a0 ("mlxsw: Add PCI bus implementation") Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/pci.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index d942a3e6fa41..846fd4df7dab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -211,21 +211,21 @@ MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1); /* pci_eqe_cmd_token * Command completion event - token */ -MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16); +MLXSW_ITEM32(pci, eqe, cmd_token, 0x00, 16, 16); /* pci_eqe_cmd_status * Command completion event - status */ -MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8); +MLXSW_ITEM32(pci, eqe, cmd_status, 0x00, 0, 8); /* pci_eqe_cmd_out_param_h * Command completion event - output parameter - higher part */ -MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32); +MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x04, 0, 32); /* pci_eqe_cmd_out_param_l * Command completion event - output parameter - lower part */ -MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32); +MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x08, 0, 32); #endif -- GitLab From 6980c52c4efb951c972409ebd146cfc348144918 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 11 Jan 2017 14:29:54 -0800 Subject: [PATCH 1322/1442] net: lwtunnel: Handle lwtunnel_fill_encap failure [ Upstream commit ea7a80858f57d8878b1499ea0f1b8a635cc48de7 ] Handle failure in lwtunnel_fill_encap adding attributes to skb. Fixes: 571e722676fe ("ipv4: support for fib route lwtunnel encap attributes") Fixes: 19e42e451506 ("ipv6: support for fib route lwtunnel encap attributes") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c | 11 +++++++---- net/ipv6/route.c | 3 ++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a8508b79b406..6a4068031aaa 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1278,8 +1278,9 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) goto nla_put_failure; #endif - if (fi->fib_nh->nh_lwtstate) - lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate); + if (fi->fib_nh->nh_lwtstate && + lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0) + goto nla_put_failure; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { @@ -1315,8 +1316,10 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) goto nla_put_failure; #endif - if (nh->nh_lwtstate) - lwtunnel_fill_encap(skb, nh->nh_lwtstate); + if (nh->nh_lwtstate && + lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0) + goto nla_put_failure; + /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; } endfor_nexthops(fi); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1b57e11e6e0d..acd8023763cb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3306,7 +3306,8 @@ static int rt6_fill_node(struct net *net, if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) goto nla_put_failure; - lwtunnel_fill_encap(skb, rt->dst.lwtstate); + if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; -- GitLab From 958bb1bdc2c272fb63f31510d00579f59acb7a06 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 11 Jan 2017 15:42:17 -0800 Subject: [PATCH 1323/1442] net: ipv4: fix table id in getroute response [ Upstream commit 8a430ed50bb1b19ca14a46661f3b1b35f2fb5c39 ] rtm_table is an 8-bit field while table ids are allowed up to u32. Commit 709772e6e065 ("net: Fix routing tables with id > 255 for legacy software") added the preference to set rtm_table in dumps to RT_TABLE_COMPAT if the table id is > 255. The table id returned on get route requests should do the same. Fixes: c36ba6603a11 ("net: Allow user to get table id from route lookup") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8197b06d9aaa..d851cae27dac 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2440,7 +2440,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = fl4->flowi4_tos; - r->rtm_table = table_id; + r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table_id)) goto nla_put_failure; r->rtm_type = rt->rt_type; -- GitLab From b66b1f5ac33d20d12fb56b30a1dac8d79c8a0038 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 12 Jan 2017 12:09:09 -0800 Subject: [PATCH 1324/1442] net: systemport: Decouple flow control from __bcm_sysport_tx_reclaim [ Upstream commit 148d3d021cf9724fcf189ce4e525a094bbf5ce89 ] The __bcm_sysport_tx_reclaim() function is used to reclaim transmit resources in different places within the driver. Most of them should not affect the state of the transit flow control. Introduce bcm_sysport_tx_clean() which cleans the ring, but does not re-enable flow control towards the networking stack, and make bcm_sysport_tx_reclaim() do the actual transmit queue flow control. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bcmsysport.c | 25 ++++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 25d1eb4933d0..be7ec5a76a54 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -710,11 +710,8 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs; unsigned int pkts_compl = 0, bytes_compl = 0; struct bcm_sysport_cb *cb; - struct netdev_queue *txq; u32 hw_ind; - txq = netdev_get_tx_queue(ndev, ring->index); - /* Compute how many descriptors have been processed since last call */ hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index)); c_index = (hw_ind >> RING_CONS_INDEX_SHIFT) & RING_CONS_INDEX_MASK; @@ -745,9 +742,6 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, ring->c_index = c_index; - if (netif_tx_queue_stopped(txq) && pkts_compl) - netif_tx_wake_queue(txq); - netif_dbg(priv, tx_done, ndev, "ring=%d c_index=%d pkts_compl=%d, bytes_compl=%d\n", ring->index, ring->c_index, pkts_compl, bytes_compl); @@ -759,16 +753,33 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, static unsigned int bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, struct bcm_sysport_tx_ring *ring) { + struct netdev_queue *txq; unsigned int released; unsigned long flags; + txq = netdev_get_tx_queue(priv->netdev, ring->index); + spin_lock_irqsave(&ring->lock, flags); released = __bcm_sysport_tx_reclaim(priv, ring); + if (released) + netif_tx_wake_queue(txq); + spin_unlock_irqrestore(&ring->lock, flags); return released; } +/* Locked version of the per-ring TX reclaim, but does not wake the queue */ +static void bcm_sysport_tx_clean(struct bcm_sysport_priv *priv, + struct bcm_sysport_tx_ring *ring) +{ + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + __bcm_sysport_tx_reclaim(priv, ring); + spin_unlock_irqrestore(&ring->lock, flags); +} + static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget) { struct bcm_sysport_tx_ring *ring = @@ -1253,7 +1264,7 @@ static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv, napi_disable(&ring->napi); netif_napi_del(&ring->napi); - bcm_sysport_tx_reclaim(priv, ring); + bcm_sysport_tx_clean(priv, ring); kfree(ring->cbs); ring->cbs = NULL; -- GitLab From 3524f64224bdef1c834e2952aaa72c175621e2e3 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 12 Jan 2017 14:24:58 -0800 Subject: [PATCH 1325/1442] tcp: fix tcp_fastopen unaligned access complaints on sparc [ Upstream commit 003c941057eaa868ca6fedd29a274c863167230d ] Fix up a data alignment issue on sparc by swapping the order of the cookie byte array field with the length field in struct tcp_fastopen_cookie, and making it a proper union to clean up the typecasting. This addresses log complaints like these: log_unaligned: 113 callbacks suppressed Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360 Kernel unaligned access at TPC[9764ac] tcp_try_fastopen+0x2ec/0x360 Kernel unaligned access at TPC[9764c8] tcp_try_fastopen+0x308/0x360 Kernel unaligned access at TPC[9764e4] tcp_try_fastopen+0x324/0x360 Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360 Cc: Eric Dumazet Signed-off-by: Shannon Nelson Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/tcp.h | 7 ++++++- net/ipv4/tcp_fastopen.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a17ae7b85218..647532b0eb03 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -62,8 +62,13 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) /* TCP Fast Open Cookie as stored in memory */ struct tcp_fastopen_cookie { + union { + u8 val[TCP_FASTOPEN_COOKIE_MAX]; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr addr; +#endif + }; s8 len; - u8 val[TCP_FASTOPEN_COOKIE_MAX]; bool exp; /* In RFC6994 experimental option format */ }; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4e777a3243f9..f51919535ca7 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -113,7 +113,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, struct tcp_fastopen_cookie tmp; if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { - struct in6_addr *buf = (struct in6_addr *) tmp.val; + struct in6_addr *buf = &tmp.addr; int i; for (i = 0; i < 4; i++) -- GitLab From 18767acb7b67b27ec4355ca623cbaa209edc8f13 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Thu, 12 Jan 2017 19:33:18 -0500 Subject: [PATCH 1326/1442] openvswitch: maintain correct checksum state in conntrack actions [ Upstream commit 75f01a4c9cc291ff5cb28ca1216adb163b7a20ee ] When executing conntrack actions on skbuffs with checksum mode CHECKSUM_COMPLETE, the checksum must be updated to account for header pushes and pulls. Otherwise we get "hw csum failure" logs similar to this (ICMP packet received on geneve tunnel via ixgbe NIC): [ 405.740065] genev_sys_6081: hw csum failure [ 405.740106] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G I 4.10.0-rc3+ #1 [ 405.740108] Call Trace: [ 405.740110] [ 405.740113] dump_stack+0x63/0x87 [ 405.740116] netdev_rx_csum_fault+0x3a/0x40 [ 405.740118] __skb_checksum_complete+0xcf/0xe0 [ 405.740120] nf_ip_checksum+0xc8/0xf0 [ 405.740124] icmp_error+0x1de/0x351 [nf_conntrack_ipv4] [ 405.740132] nf_conntrack_in+0xe1/0x550 [nf_conntrack] [ 405.740137] ? find_bucket.isra.2+0x62/0x70 [openvswitch] [ 405.740143] __ovs_ct_lookup+0x95/0x980 [openvswitch] [ 405.740145] ? netif_rx_internal+0x44/0x110 [ 405.740149] ovs_ct_execute+0x147/0x4b0 [openvswitch] [ 405.740153] do_execute_actions+0x22e/0xa70 [openvswitch] [ 405.740157] ovs_execute_actions+0x40/0x120 [openvswitch] [ 405.740161] ovs_dp_process_packet+0x84/0x120 [openvswitch] [ 405.740166] ovs_vport_receive+0x73/0xd0 [openvswitch] [ 405.740168] ? udp_rcv+0x1a/0x20 [ 405.740170] ? ip_local_deliver_finish+0x93/0x1e0 [ 405.740172] ? ip_local_deliver+0x6f/0xe0 [ 405.740174] ? ip_rcv_finish+0x3a0/0x3a0 [ 405.740176] ? ip_rcv_finish+0xdb/0x3a0 [ 405.740177] ? ip_rcv+0x2a7/0x400 [ 405.740180] ? __netif_receive_skb_core+0x970/0xa00 [ 405.740185] netdev_frame_hook+0xd3/0x160 [openvswitch] [ 405.740187] __netif_receive_skb_core+0x1dc/0xa00 [ 405.740194] ? ixgbe_clean_rx_irq+0x46d/0xa20 [ixgbe] [ 405.740197] __netif_receive_skb+0x18/0x60 [ 405.740199] netif_receive_skb_internal+0x40/0xb0 [ 405.740201] napi_gro_receive+0xcd/0x120 [ 405.740204] gro_cell_poll+0x57/0x80 [geneve] [ 405.740206] net_rx_action+0x260/0x3c0 [ 405.740209] __do_softirq+0xc9/0x28c [ 405.740211] irq_exit+0xd9/0xf0 [ 405.740213] do_IRQ+0x51/0xd0 [ 405.740215] common_interrupt+0x93/0x93 Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") Signed-off-by: Lance Richardson Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/conntrack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index fecefa2dc94e..eab210bb1ef0 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -514,7 +514,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, int hooknum, nh_off, err = NF_ACCEPT; nh_off = skb_network_offset(skb); - skb_pull(skb, nh_off); + skb_pull_rcsum(skb, nh_off); /* See HOOK2MANIP(). */ if (maniptype == NF_NAT_MANIP_SRC) @@ -579,6 +579,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, err = nf_nat_packet(ct, ctinfo, hooknum, skb); push: skb_push(skb, nh_off); + skb_postpush_rcsum(skb, skb->data, nh_off); return err; } @@ -890,7 +891,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, /* The conntrack module expects to be working at L3. */ nh_ofs = skb_network_offset(skb); - skb_pull(skb, nh_ofs); + skb_pull_rcsum(skb, nh_ofs); if (key->ip.frag != OVS_FRAG_TYPE_NONE) { err = handle_fragments(net, key, info->zone.id, skb); @@ -904,6 +905,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, err = ovs_ct_lookup(net, key, info, skb); skb_push(skb, nh_ofs); + skb_postpush_rcsum(skb, skb->data, nh_ofs); if (err) kfree_skb(skb); return err; -- GitLab From 77ce30dc4df48696a5b3bf8512cc2d38918116c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Jan 2017 08:39:24 -0800 Subject: [PATCH 1327/1442] mlx4: do not call napi_schedule() without care [ Upstream commit 8cf699ec849f4ca1413cea01289bd7d37dbcc626 ] Disable BH around the call to napi_schedule() to avoid following warning [ 52.095499] NOHZ: local_softirq_pending 08 [ 52.421291] NOHZ: local_softirq_pending 08 [ 52.608313] NOHZ: local_softirq_pending 08 Fixes: 8d59de8f7bb3 ("net/mlx4_en: Process all completions in RX rings after port goes up") Signed-off-by: Eric Dumazet Cc: Erez Shitrit Cc: Eugenia Emantayev Cc: Tariq Toukan Acked-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fb8bb027b69c..d223e7cb68ba 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1740,8 +1740,11 @@ int mlx4_en_start_port(struct net_device *dev) /* Process all completions if exist to prevent * the queues freezing if they are full */ - for (i = 0; i < priv->rx_ring_num; i++) + for (i = 0; i < priv->rx_ring_num; i++) { + local_bh_disable(); napi_schedule(&priv->rx_cq[i]->napi); + local_bh_enable(); + } netif_tx_start_all_queues(dev); netif_device_attach(dev); -- GitLab From 08e650708dd094213d1601617dd71fb8b965802f Mon Sep 17 00:00:00 2001 From: Masaru Nagai Date: Mon, 16 Jan 2017 11:45:21 +0100 Subject: [PATCH 1328/1442] ravb: do not use zero-length alignment DMA descriptor [ Upstream commit 8ec3e8a192ba6f13be4522ee81227c792c86fb1a ] Due to alignment requirements of the hardware transmissions are split into two DMA descriptors, a small padding descriptor of 0 - 3 bytes in length followed by a descriptor for rest of the packet. In the case of IP packets the first descriptor will never be zero due to the way that the stack aligns buffers for IP packets. However, for non-IP packets it may be zero. In that case it has been reported that timeouts occur, presumably because transmission stops at the first zero-length DMA descriptor and thus the packet is not transmitted. However, in my environment a BUG is triggered as follows: [ 20.381417] ------------[ cut here ]------------ [ 20.386054] kernel BUG at lib/swiotlb.c:495! [ 20.390324] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 20.395805] Modules linked in: [ 20.398862] CPU: 0 PID: 2089 Comm: mz Not tainted 4.10.0-rc3-00001-gf13ad2db193f #162 [ 20.406689] Hardware name: Renesas Salvator-X board based on r8a7796 (DT) [ 20.413474] task: ffff80063b1f1900 task.stack: ffff80063a71c000 [ 20.419404] PC is at swiotlb_tbl_map_single+0x178/0x2ec [ 20.424625] LR is at map_single+0x4c/0x98 [ 20.428629] pc : [] lr : [] pstate: 800001c5 [ 20.436019] sp : ffff80063a71f9b0 [ 20.439327] x29: ffff80063a71f9b0 x28: ffff80063a20d500 [ 20.444636] x27: ffff000008ed5000 x26: 0000000000000000 [ 20.449944] x25: 000000067abe2adc x24: 0000000000000000 [ 20.455252] x23: 0000000000200000 x22: 0000000000000001 [ 20.460559] x21: 0000000000175ffe x20: ffff80063b2a0010 [ 20.465866] x19: 0000000000000000 x18: 0000ffffcae6fb20 [ 20.471173] x17: 0000ffffa09ba018 x16: ffff0000087c8b70 [ 20.476480] x15: 0000ffffa084f588 x14: 0000ffffa09cfa14 [ 20.481787] x13: 0000ffffcae87ff0 x12: 000000000063abe2 [ 20.487098] x11: ffff000008096360 x10: ffff80063abe2adc [ 20.492407] x9 : 0000000000000000 x8 : 0000000000000000 [ 20.497718] x7 : 0000000000000000 x6 : ffff000008ed50d0 [ 20.503028] x5 : 0000000000000000 x4 : 0000000000000001 [ 20.508338] x3 : 0000000000000000 x2 : 000000067abe2adc [ 20.513648] x1 : 00000000bafff000 x0 : 0000000000000000 [ 20.518958] [ 20.520446] Process mz (pid: 2089, stack limit = 0xffff80063a71c000) [ 20.526798] Stack: (0xffff80063a71f9b0 to 0xffff80063a720000) [ 20.532543] f9a0: ffff80063a71fa30 ffff00000839c680 [ 20.540374] f9c0: ffff80063b2a0010 ffff80063b2a0010 0000000000000001 0000000000000000 [ 20.548204] f9e0: 000000000000006e ffff80063b23c000 ffff80063b23c000 0000000000000000 [ 20.556034] fa00: ffff80063b23c000 ffff80063a20d500 000000013b1f1900 0000000000000000 [ 20.563864] fa20: ffff80063ffd18e0 ffff80063b2a0010 ffff80063a71fa60 ffff00000839cd10 [ 20.571694] fa40: ffff80063b2a0010 0000000000000000 ffff80063ffd18e0 000000067abe2adc [ 20.579524] fa60: ffff80063a71fa90 ffff000008096380 ffff80063b2a0010 0000000000000000 [ 20.587353] fa80: 0000000000000000 0000000000000001 ffff80063a71fac0 ffff00000864f770 [ 20.595184] faa0: ffff80063b23caf0 0000000000000000 0000000000000000 0000000000000140 [ 20.603014] fac0: ffff80063a71fb60 ffff0000087e6498 ffff80063a20d500 ffff80063b23c000 [ 20.610843] fae0: 0000000000000000 ffff000008daeaf0 0000000000000000 ffff000008daeb00 [ 20.618673] fb00: ffff80063a71fc0c ffff000008da7000 ffff80063b23c090 ffff80063a44f000 [ 20.626503] fb20: 0000000000000000 ffff000008daeb00 ffff80063a71fc0c ffff000008da7000 [ 20.634333] fb40: ffff80063b23c090 0000000000000000 ffff800600000037 ffff0000087e63d8 [ 20.642163] fb60: ffff80063a71fbc0 ffff000008807510 ffff80063a692400 ffff80063a20d500 [ 20.649993] fb80: ffff80063a44f000 ffff80063b23c000 ffff80063a69249c 0000000000000000 [ 20.657823] fba0: 0000000000000000 ffff80063a087800 ffff80063b23c000 ffff80063a20d500 [ 20.665653] fbc0: ffff80063a71fc10 ffff0000087e67dc ffff80063a20d500 ffff80063a692400 [ 20.673483] fbe0: ffff80063b23c000 0000000000000000 ffff80063a44f000 ffff80063a69249c [ 20.681312] fc00: ffff80063a5f1a10 000000103a087800 ffff80063a71fc70 ffff0000087e6b24 [ 20.689142] fc20: ffff80063a5f1a80 ffff80063a71fde8 000000000000000f 00000000000005ea [ 20.696972] fc40: ffff80063a5f1a10 0000000000000000 000000000000000f ffff00000887fbd0 [ 20.704802] fc60: fffffff43a5f1a80 0000000000000000 ffff80063a71fc80 ffff000008880240 [ 20.712632] fc80: ffff80063a71fd90 ffff0000087c7a34 ffff80063afc7180 0000000000000000 [ 20.720462] fca0: 0000ffffcae6fe18 0000000000000014 0000000060000000 0000000000000015 [ 20.728292] fcc0: 0000000000000123 00000000000000ce ffff0000088d2000 ffff80063b1f1900 [ 20.736122] fce0: 0000000000008933 ffff000008e7cb80 ffff80063a71fd80 ffff0000087c50a4 [ 20.743951] fd00: 0000000000008933 ffff000008e7cb80 ffff000008e7cb80 000000100000000e [ 20.751781] fd20: ffff80063a71fe4c 0000ffff00000300 0000000000000123 0000000000000000 [ 20.759611] fd40: 0000000000000000 ffff80063b1f0000 000000000000000e 0000000000000300 [ 20.767441] fd60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 20.775271] fd80: 0000000000000000 0000000000000000 ffff80063a71fda0 ffff0000087c8c20 [ 20.783100] fda0: 0000000000000000 ffff000008082f30 0000000000000000 0000800637260000 [ 20.790930] fdc0: ffffffffffffffff 0000ffffa0903078 0000000000000000 000000001ea87232 [ 20.798760] fde0: 000000000000000f ffff80063a71fe40 ffff800600000014 ffff000000000001 [ 20.806590] fe00: 0000000000000000 0000000000000000 ffff80063a71fde8 0000000000000000 [ 20.814420] fe20: 0000000000000000 0000000000000000 0000000000000000 0000000000000001 [ 20.822249] fe40: 0000000203000011 0000000000000000 0000000000000000 ffff80063a68aa00 [ 20.830079] fe60: ffff80063a68aa00 0000000000000003 0000000000008933 ffff0000081f1b9c [ 20.837909] fe80: 0000000000000000 ffff000008082f30 0000000000000000 0000800637260000 [ 20.845739] fea0: ffffffffffffffff 0000ffffa07ca81c 0000000060000000 0000000000000015 [ 20.853569] fec0: 0000000000000003 000000001ea87232 000000000000000f 0000000000000000 [ 20.861399] fee0: 0000ffffcae6fe18 0000000000000014 0000000000000300 0000000000000000 [ 20.869228] ff00: 00000000000000ce 0000000000000000 00000000ffffffff 0000000000000000 [ 20.877059] ff20: 0000000000000002 0000ffffcae87ff0 0000ffffa09cfa14 0000ffffa084f588 [ 20.884888] ff40: 0000000000000000 0000ffffa09ba018 0000ffffcae6fb20 000000001ea87010 [ 20.892718] ff60: 0000ffffa09b9000 0000ffffcae6fe30 0000ffffcae6fe18 000000000000000f [ 20.900548] ff80: 0000000000000003 000000001ea87232 0000000000000000 0000000000000000 [ 20.908378] ffa0: 0000000000000000 0000ffffcae6fdc0 0000ffffa09a7824 0000ffffcae6fdc0 [ 20.916208] ffc0: 0000ffffa0903078 0000000060000000 0000000000000003 00000000000000ce [ 20.924038] ffe0: 0000000000000000 0000000000000000 ffffffffffffffff ffffffffffffffff [ 20.931867] Call trace: [ 20.934312] Exception stack(0xffff80063a71f7e0 to 0xffff80063a71f910) [ 20.940750] f7e0: 0000000000000000 0001000000000000 ffff80063a71f9b0 ffff00000839c4c0 [ 20.948580] f800: ffff80063a71f840 ffff00000888a6e4 ffff80063a24c418 ffff80063a24c448 [ 20.956410] f820: 0000000000000000 ffff00000811cd54 ffff80063a71f860 ffff80063a24c458 [ 20.964240] f840: ffff80063a71f870 ffff00000888b258 ffff80063a24c418 0000000000000001 [ 20.972070] f860: ffff80063a71f910 ffff80063a7b7028 ffff80063a71f890 ffff0000088825e4 [ 20.979899] f880: 0000000000000000 00000000bafff000 000000067abe2adc 0000000000000000 [ 20.987729] f8a0: 0000000000000001 0000000000000000 ffff000008ed50d0 0000000000000000 [ 20.995560] f8c0: 0000000000000000 0000000000000000 ffff80063abe2adc ffff000008096360 [ 21.003390] f8e0: 000000000063abe2 0000ffffcae87ff0 0000ffffa09cfa14 0000ffffa084f588 [ 21.011219] f900: ffff0000087c8b70 0000ffffa09ba018 [ 21.016097] [] swiotlb_tbl_map_single+0x178/0x2ec [ 21.022362] [] map_single+0x4c/0x98 [ 21.027411] [] swiotlb_map_page+0xa4/0x138 [ 21.033072] [] __swiotlb_map_page+0x20/0x7c [ 21.038821] [] ravb_start_xmit+0x174/0x668 [ 21.044484] [] dev_hard_start_xmit+0x8c/0x120 [ 21.050407] [] sch_direct_xmit+0x108/0x1a0 [ 21.056064] [] __dev_queue_xmit+0x194/0x4cc [ 21.061807] [] dev_queue_xmit+0x10/0x18 [ 21.067214] [] packet_sendmsg+0xf40/0x1220 [ 21.072873] [] sock_sendmsg+0x18/0x2c [ 21.078097] [] SyS_sendto+0xb0/0xf0 [ 21.083150] [] el0_svc_naked+0x24/0x28 [ 21.088462] Code: d34bfef7 2a1803f3 1a9f86d6 35fff878 (d4210000) [ 21.094611] ---[ end trace 5bc544ad491f3814 ]--- [ 21.099234] Kernel panic - not syncing: Fatal exception in interrupt [ 21.105587] Kernel Offset: disabled [ 21.109073] Memory Limit: none [ 21.112126] ---[ end Kernel panic - not syncing: Fatal exception in interrupt Fixes: 2f45d1902acf ("ravb: minimize TX data copying") Signed-off-by: Masaru Nagai Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/ravb_main.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index d6a217874a8b..862f18ed6022 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1508,6 +1508,19 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) buffer = PTR_ALIGN(priv->tx_align[q], DPTR_ALIGN) + entry / NUM_TX_DESC * DPTR_ALIGN; len = PTR_ALIGN(skb->data, DPTR_ALIGN) - skb->data; + /* Zero length DMA descriptors are problematic as they seem to + * terminate DMA transfers. Avoid them by simply using a length of + * DPTR_ALIGN (4) when skb data is aligned to DPTR_ALIGN. + * + * As skb is guaranteed to have at least ETH_ZLEN (60) bytes of + * data by the call to skb_put_padto() above this is safe with + * respect to both the length of the first DMA descriptor (len) + * overflowing the available data and the length of the second DMA + * descriptor (skb->len - len) being negative. + */ + if (len == 0) + len = DPTR_ALIGN; + memcpy(buffer, skb->data, len); dma_addr = dma_map_single(ndev->dev.parent, buffer, len, DMA_TO_DEVICE); if (dma_mapping_error(ndev->dev.parent, dma_addr)) -- GitLab From c7a5df92d1e87884a170c0245e92ac6c8f9ec4d7 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 13 Jan 2017 10:12:20 +0100 Subject: [PATCH 1329/1442] ip6_tunnel: Account for tunnel header in tunnel MTU [ Upstream commit 02ca0423fd65a0a9c4d70da0dbb8f4b8503f08c7 ] With ip6gre we have a tunnel header which also makes the tunnel MTU smaller. We need to reserve room for it. Previously we were using up space reserved for the Tunnel Encapsulation Limit option header (RFC 2473). Also, after commit b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") our contract with the caller has changed. Now we check if the packet length exceeds the tunnel MTU after the tunnel header has been pushed, unlike before. This is reflected in the check where we look at the packet length minus the size of the tunnel header, which is already accounted for in tunnel MTU. Fixes: b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index d76674efe523..f95437f1087c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1108,7 +1108,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - psh_hlen; + mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1117,7 +1117,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, mtu = IPV6_MIN_MTU; if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu && !skb_is_gso(skb)) { + if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; -- GitLab From 2d6b61ec9d3aed5091d6df4703a079d109bef3c1 Mon Sep 17 00:00:00 2001 From: Basil Gunn Date: Sat, 14 Jan 2017 12:18:55 -0800 Subject: [PATCH 1330/1442] ax25: Fix segfault after sock connection timeout [ Upstream commit 8a367e74c0120ef68c8c70d5a025648c96626dff ] The ax.25 socket connection timed out & the sock struct has been previously taken down ie. sock struct is now a NULL pointer. Checking the sock_flag causes the segfault. Check if the socket struct pointer is NULL before checking sock_flag. This segfault is seen in timed out netrom connections. Please submit to -stable. Signed-off-by: Basil Gunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ax25/ax25_subr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 655a7d4c96e1..983f0b5e14f1 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -264,7 +264,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - if (!sock_flag(ax25->sk, SOCK_DESTROY)) + if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); -- GitLab From b260a714a638cabb72f3d386ecb66ec04e06fcce Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 15 Jan 2017 10:14:06 -0500 Subject: [PATCH 1331/1442] net sched actions: fix refcnt when GETing of action after bind [ Upstream commit 0faa9cb5b3836a979864a6357e01d2046884ad52 ] Demonstrating the issue: .. add a drop action $sudo $TC actions add action drop index 10 .. retrieve it $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 0 installed 29 sec used 29 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 ... bug 1 above: reference is two. Reference is actually 1 but we forget to subtract 1. ... do a GET again and we see the same issue try a few times and nothing changes ~$ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 0 installed 31 sec used 31 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 ... lets try to bind the action to a filter.. $ sudo $TC qdisc add dev lo ingress $ sudo $TC filter add dev lo parent ffff: protocol ip prio 1 \ u32 match ip dst 127.0.0.1/32 flowid 1:1 action gact index 10 ... and now a few GETs: $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 3 bind 1 installed 204 sec used 204 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 4 bind 1 installed 206 sec used 206 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 5 bind 1 installed 235 sec used 235 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 .... as can be observed the reference count keeps going up. After the fix $ sudo $TC actions add action drop index 10 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 1 bind 0 installed 4 sec used 4 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 1 bind 0 installed 6 sec used 6 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC qdisc add dev lo ingress $ sudo $TC filter add dev lo parent ffff: protocol ip prio 1 \ u32 match ip dst 127.0.0.1/32 flowid 1:1 action gact index 10 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 1 installed 32 sec used 32 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 $ sudo $TC -s actions get action gact index 10 action order 1: gact action drop random type none pass val 0 index 10 ref 2 bind 1 installed 33 sec used 33 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 Fixes: aecc5cefc389 ("net sched actions: fix GETing actions") Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_api.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f893d180da1c..c6c2a93cc2a2 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -903,8 +903,6 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; - if (event == RTM_GETACTION) - act->tcfa_refcnt++; list_add_tail(&act->list, &actions); } @@ -917,7 +915,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, return ret; } err: - tcf_action_destroy(&actions, 0); + if (event != RTM_GETACTION) + tcf_action_destroy(&actions, 0); return ret; } -- GitLab From 3eab5dd0eb19b6ebed8ef7e7477d9f3048cc78fa Mon Sep 17 00:00:00 2001 From: Rolf Neugebauer Date: Tue, 17 Jan 2017 18:13:51 +0000 Subject: [PATCH 1332/1442] virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on xmit [ Upstream commit 501db511397fd6efff3aa5b4e8de415b55559550 ] This patch part reverts fd2a0437dc33 and e858fae2b0b8 which introduced a subtle change in how the virtio_net flags are derived from the SKBs ip_summed field. With the above commits, the flags are set to VIRTIO_NET_HDR_F_DATA_VALID when ip_summed == CHECKSUM_UNNECESSARY, thus treating it differently to ip_summed == CHECKSUM_NONE, which should be the same. Further, the virtio spec 1.0 / CS04 explicitly says that VIRTIO_NET_HDR_F_DATA_VALID must not be set by the driver. Fixes: fd2a0437dc33 ("virtio_net: introduce virtio_net_hdr_{from,to}_skb") Fixes: e858fae2b0b8 (" virtio_net: use common code for virtio_net_hdr and skb GSO conversion") Signed-off-by: Rolf Neugebauer Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/virtio_net.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 1c912f85e041..40914bb396e7 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -91,8 +91,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, skb_checksum_start_offset(skb)); hdr->csum_offset = __cpu_to_virtio16(little_endian, skb->csum_offset); - } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ return 0; -- GitLab From 1e7cbb413f63d8fa790c8dabc208ce2a02339c26 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 20 Jan 2017 14:32:42 +0800 Subject: [PATCH 1333/1442] virtio-net: restore VIRTIO_HDR_F_DATA_VALID on receiving [ Upstream commit 6391a4481ba0796805d6581e42f9f0418c099e34 ] Commit 501db511397f ("virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on xmit") in fact disables VIRTIO_HDR_F_DATA_VALID on receiving path too, fixing this by adding a hint (has_data_valid) and set it only on the receiving path. Cc: Rolf Neugebauer Signed-off-by: Jason Wang Acked-by: Rolf Neugebauer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvtap.c | 2 +- drivers/net/tun.c | 2 +- drivers/net/virtio_net.c | 2 +- include/linux/virtio_net.h | 6 +++++- net/packet/af_packet.c | 2 +- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 7869b0651576..6f38daf2d978 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -827,7 +827,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, return -EINVAL; ret = virtio_net_hdr_from_skb(skb, &vnet_hdr, - macvtap_is_little_endian(q)); + macvtap_is_little_endian(q), true); if (ret) BUG(); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index db6acecabeaa..18402d79539e 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1374,7 +1374,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, return -EINVAL; ret = virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(tun)); + tun_is_little_endian(tun), true); if (ret) { struct skb_shared_info *sinfo = skb_shinfo(skb); pr_err("unexpected GSO type: " diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index cbf1c613c67a..51fc0c33a62f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -840,7 +840,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) hdr = skb_vnet_hdr(skb); if (virtio_net_hdr_from_skb(skb, &hdr->hdr, - virtio_is_little_endian(vi->vdev))) + virtio_is_little_endian(vi->vdev), false)) BUG(); if (vi->mergeable_rx_bufs) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 40914bb396e7..f211c348e592 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -56,7 +56,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, - bool little_endian) + bool little_endian, + bool has_data_valid) { memset(hdr, 0, sizeof(*hdr)); @@ -91,6 +92,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, skb_checksum_start_offset(skb)); hdr->csum_offset = __cpu_to_virtio16(little_endian, skb->csum_offset); + } else if (has_data_valid && + skb->ip_summed == CHECKSUM_UNNECESSARY) { + hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index dd2332390c45..94e4a5941d89 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1972,7 +1972,7 @@ static int __packet_rcv_vnet(const struct sk_buff *skb, { *vnet_hdr = (const struct virtio_net_hdr) { 0 }; - if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le())) + if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le(), true)) BUG(); return 0; -- GitLab From d1c95f9ce102e77875ee8febc73d2a217dd020a0 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Mon, 16 Jan 2017 18:37:58 -0500 Subject: [PATCH 1334/1442] vxlan: fix byte order of vxlan-gpe port number [ Upstream commit d5ff72d9af73bc3cbaa3edb541333a851f8c7295 ] vxlan->cfg.dst_port is in network byte order, so an htons() is needed here. Also reduced comment length to stay closer to 80 column width (still slightly over, however). Fixes: e1e5314de08b ("vxlan: implement GPE") Signed-off-by: Lance Richardson Acked-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2ba01ca02c9c..0fafaa9d903b 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2887,7 +2887,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, memcpy(&vxlan->cfg, conf, sizeof(*conf)); if (!vxlan->cfg.dst_port) { if (conf->flags & VXLAN_F_GPE) - vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */ + vxlan->cfg.dst_port = htons(4790); /* IANA VXLAN-GPE port */ else vxlan->cfg.dst_port = default_port; } -- GitLab From 948e137ad9ba0d4ff8ebc5ee994dc26fe3ebc4f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jan 2017 12:12:17 -0800 Subject: [PATCH 1335/1442] net: fix harmonize_features() vs NETIF_F_HIGHDMA [ Upstream commit 7be2c82cfd5d28d7adb66821a992604eb6dd112e ] Ashizuka reported a highmem oddity and sent a patch for freescale fec driver. But the problem root cause is that core networking stack must ensure no skb with highmem fragment is ever sent through a device that does not assert NETIF_F_HIGHDMA in its features. We need to call illegal_highdma() from harmonize_features() regardless of CSUM checks. Fixes: ec5f06156423 ("net: Kill link between CSUM and SG features.") Signed-off-by: Eric Dumazet Cc: Pravin Shelar Reported-by: "Ashizuka, Yuusuke" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index e1d731fdc72c..df51c50927ab 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2815,9 +2815,9 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); - } else if (illegal_highdma(skb->dev, skb)) { - features &= ~NETIF_F_SG; } + if (illegal_highdma(skb->dev, skb)) + features &= ~NETIF_F_SG; return features; } -- GitLab From b335e65666610c456a5fed734cdd04ec84639a52 Mon Sep 17 00:00:00 2001 From: Daniel Gonzalez Cabanelas Date: Tue, 17 Jan 2017 16:26:55 -0800 Subject: [PATCH 1336/1442] net: phy: bcm63xx: Utilize correct config_intr function [ Upstream commit cd33b3e0da43522ff8e8f2b2b71d3d08298512b0 ] Commit a1cba5613edf ("net: phy: Add Broadcom phy library for common interfaces") make the BCM63xx PHY driver utilize bcm_phy_config_intr() which would appear to do the right thing, except that it does not write to the MII_BCM63XX_IR register but to MII_BCM54XX_ECR which is different. This would be causing invalid link parameters and events from being generated by the PHY interrupt. Fixes: a1cba5613edf ("net: phy: Add Broadcom phy library for common interfaces") Signed-off-by: Daniel Gonzalez Cabanelas Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/bcm63xx.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c index e741bf614c4e..b0492ef2cdaa 100644 --- a/drivers/net/phy/bcm63xx.c +++ b/drivers/net/phy/bcm63xx.c @@ -21,6 +21,23 @@ MODULE_DESCRIPTION("Broadcom 63xx internal PHY driver"); MODULE_AUTHOR("Maxime Bizon "); MODULE_LICENSE("GPL"); +static int bcm63xx_config_intr(struct phy_device *phydev) +{ + int reg, err; + + reg = phy_read(phydev, MII_BCM63XX_IR); + if (reg < 0) + return reg; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + reg &= ~MII_BCM63XX_IR_GMASK; + else + reg |= MII_BCM63XX_IR_GMASK; + + err = phy_write(phydev, MII_BCM63XX_IR, reg); + return err; +} + static int bcm63xx_config_init(struct phy_device *phydev) { int reg, err; @@ -55,7 +72,7 @@ static struct phy_driver bcm63xx_driver[] = { .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, - .config_intr = bcm_phy_config_intr, + .config_intr = bcm63xx_config_intr, }, { /* same phy as above, with just a different OUI */ .phy_id = 0x002bdc00, @@ -67,7 +84,7 @@ static struct phy_driver bcm63xx_driver[] = { .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, - .config_intr = bcm_phy_config_intr, + .config_intr = bcm63xx_config_intr, } }; module_phy_driver(bcm63xx_driver); -- GitLab From e9db042dca20f7b6c1969fa21b121dde01057a74 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 17 Jan 2017 14:57:36 -0800 Subject: [PATCH 1337/1442] lwtunnel: fix autoload of lwt modules [ Upstream commit 9ed59592e3e379b2e9557dc1d9e9ec8fcbb33f16] Trying to add an mpls encap route when the MPLS modules are not loaded hangs. For example: CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m $ ip route add 10.10.10.10/32 encap mpls 100 via inet 10.100.1.2 The ip command hangs: root 880 826 0 21:25 pts/0 00:00:00 ip route add 10.10.10.10/32 encap mpls 100 via inet 10.100.1.2 $ cat /proc/880/stack [] call_usermodehelper_exec+0xd6/0x134 [] __request_module+0x27b/0x30a [] lwtunnel_build_state+0xe4/0x178 [] fib_create_info+0x47f/0xdd4 [] fib_table_insert+0x90/0x41f [] inet_rtm_newroute+0x4b/0x52 ... modprobe is trying to load rtnl-lwt-MPLS: root 881 5 0 21:25 ? 00:00:00 /sbin/modprobe -q -- rtnl-lwt-MPLS and it hangs after loading mpls_router: $ cat /proc/881/stack [] rtnl_lock+0x12/0x14 [] register_netdevice_notifier+0x16/0x179 [] mpls_init+0x25/0x1000 [mpls_router] [] do_one_initcall+0x8e/0x13f [] do_init_module+0x5a/0x1e5 [] load_module+0x13bd/0x17d6 ... The problem is that lwtunnel_build_state is called with rtnl lock held preventing mpls_init from registering. Given the potential references held by the time lwtunnel_build_state it can not drop the rtnl lock to the load module. So, extract the module loading code from lwtunnel_build_state into a new function to validate the encap type. The new function is called while converting the user request into a fib_config which is well before any table, device or fib entries are examined. Fixes: 745041e2aaf1 ("lwtunnel: autoload of lwt modules") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/lwtunnel.h | 11 ++++++++ net/core/lwtunnel.c | 62 +++++++++++++++++++++++++++++++++++++---- net/ipv4/fib_frontend.c | 8 ++++++ net/ipv6/route.c | 12 +++++++- 4 files changed, 86 insertions(+), 7 deletions(-) diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index ea3f80f58fd6..95581ca69f0f 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -106,6 +106,8 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); +int lwtunnel_valid_encap_type(u16 encap_type); +int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len); int lwtunnel_build_state(struct net_device *dev, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, @@ -169,6 +171,15 @@ static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, return -EOPNOTSUPP; } +static inline int lwtunnel_valid_encap_type(u16 encap_type) +{ + return -EOPNOTSUPP; +} +static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) +{ + return -EOPNOTSUPP; +} + static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index e5f84c26ba1a..2f6db386624e 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef CONFIG_MODULES @@ -110,25 +111,74 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); + if (likely(ops && ops->build_state)) + ret = ops->build_state(dev, encap, family, cfg, lws); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_build_state); + +int lwtunnel_valid_encap_type(u16 encap_type) +{ + const struct lwtunnel_encap_ops *ops; + int ret = -EINVAL; + + if (encap_type == LWTUNNEL_ENCAP_NONE || + encap_type > LWTUNNEL_ENCAP_MAX) + return ret; + + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[encap_type]); + rcu_read_unlock(); #ifdef CONFIG_MODULES if (!ops) { const char *encap_type_str = lwtunnel_encap_str(encap_type); if (encap_type_str) { - rcu_read_unlock(); + __rtnl_unlock(); request_module("rtnl-lwt-%s", encap_type_str); + rtnl_lock(); + rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); + rcu_read_unlock(); } } #endif - if (likely(ops && ops->build_state)) - ret = ops->build_state(dev, encap, family, cfg, lws); - rcu_read_unlock(); + return ops ? 0 : -EOPNOTSUPP; +} +EXPORT_SYMBOL(lwtunnel_valid_encap_type); - return ret; +int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) +{ + struct rtnexthop *rtnh = (struct rtnexthop *)attr; + struct nlattr *nla_entype; + struct nlattr *attrs; + struct nlattr *nla; + u16 encap_type; + int attrlen; + + while (rtnh_ok(rtnh, remaining)) { + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + attrs = rtnh_attrs(rtnh); + nla = nla_find(attrs, attrlen, RTA_ENCAP); + nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); + + if (nla_entype) { + encap_type = nla_get_u16(nla_entype); + + if (lwtunnel_valid_encap_type(encap_type) != 0) + return -EOPNOTSUPP; + } + } + rtnh = rtnh_next(rtnh, &remaining); + } + + return 0; } -EXPORT_SYMBOL(lwtunnel_build_state); +EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr); int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3e4f183fc241..5b03d7f3b255 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -676,6 +677,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_mx_len = nla_len(attr); break; case RTA_MULTIPATH: + err = lwtunnel_valid_encap_type_attr(nla_data(attr), + nla_len(attr)); + if (err < 0) + goto errout; cfg->fc_mp = nla_data(attr); cfg->fc_mp_len = nla_len(attr); break; @@ -690,6 +695,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, break; case RTA_ENCAP_TYPE: cfg->fc_encap_type = nla_get_u16(attr); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + if (err < 0) + goto errout; break; } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index acd8023763cb..bff4460f17be 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2885,6 +2885,11 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_MULTIPATH]) { cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); + + err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, + cfg->fc_mp_len); + if (err < 0) + goto errout; } if (tb[RTA_PREF]) { @@ -2898,9 +2903,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_ENCAP]) cfg->fc_encap = tb[RTA_ENCAP]; - if (tb[RTA_ENCAP_TYPE]) + if (tb[RTA_ENCAP_TYPE]) { cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type); + if (err < 0) + goto errout; + } + if (tb[RTA_EXPIRES]) { unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); -- GitLab From 79453ab8816401626be91070387ec751f508219b Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 19 Jan 2017 16:26:21 +0800 Subject: [PATCH 1338/1442] ipv6: addrconf: Avoid addrconf_disable_change() using RCU read-side lock [ Upstream commit 03e4deff4987f79c34112c5ba4eb195d4f9382b0 ] Just like commit 4acd4945cd1e ("ipv6: addrconf: Avoid calling netdevice notifiers with RCU read-side lock"), it is unnecessary to make addrconf_disable_change() use RCU iteration over the netdev list, since it already holds the RTNL lock, or we may meet Illegal context switch in RCU read-side critical section. Signed-off-by: Kefeng Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4bc5ba3ae452..95dfcba38ff6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5515,8 +5515,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); @@ -5525,7 +5524,6 @@ static void addrconf_disable_change(struct net *net, __s32 newf) dev_disable_change(idev); } } - rcu_read_unlock(); } static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) -- GitLab From 0c687a735cdb1d7670097ab12e505a14fdec55ca Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 19 Jan 2017 16:36:39 +0300 Subject: [PATCH 1339/1442] tcp: initialize max window for a new fastopen socket [ Upstream commit 0dbd7ff3ac5017a46033a9d0a87a8267d69119d9 ] Found that if we run LTP netstress test with large MSS (65K), the first attempt from server to send data comparable to this MSS on fastopen connection will be delayed by the probe timer. Here is an example: < S seq 0:0 win 43690 options [mss 65495 wscale 7 tfo cookie] length 32 > S. seq 0:0 ack 1 win 43690 options [mss 65495 wscale 7] length 0 < . ack 1 win 342 length 0 Inside tcp_sendmsg(), tcp_send_mss() returns max MSS in 'mss_now', as well as in 'size_goal'. This results the segment not queued for transmition until all the data copied from user buffer. Then, inside __tcp_push_pending_frames(), it breaks on send window test and continues with the check probe timer. Fragmentation occurs in tcp_write_wakeup()... +0.2 > P. seq 1:43777 ack 1 win 342 length 43776 < . ack 43777, win 1365 length 0 > P. seq 43777:65001 ack 1 win 342 options [...] length 21224 ... This also contradicts with the fact that we should bound to the half of the window if it is large. Fix this flaw by correctly initializing max_window. Before that, it could have large values that affect further calculations of 'size_goal'. Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Signed-off-by: Alexey Kodanev Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_fastopen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f51919535ca7..dd2560c83a85 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -205,6 +205,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, * scaled. So correct it appropriately. */ tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + tp->max_window = tp->snd_wnd; /* Activate the retrans timer so that SYNACK can be retransmitted. * The request socket is not added to the ehash -- GitLab From 087dced61adcd8275de2586aa1d9f51c3887c1bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jan 2017 23:03:08 -0800 Subject: [PATCH 1340/1442] net/mlx5e: Do not recycle pages from emergency reserve [ Upstream commit e048fc50d7bde23136e098e04a324d7e3404408d ] A driver using dev_alloc_page() must not reuse a page allocated from emergency memory reserve. Otherwise all packets using this page will be immediately dropped, unless for very specific sockets having SOCK_MEMALLOC bit set. This issue might be hard to debug, because only a fraction of received packets would be dropped. Fixes: 4415a0319f92 ("net/mlx5e: Implement RX mapped page cache for page recycle") Signed-off-by: Eric Dumazet Cc: Tariq Toukan Cc: Saeed Mahameed Acked-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 33495d88aeb2..e7b2158bb48a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -193,6 +193,9 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, return false; } + if (unlikely(page_is_pfmemalloc(dma_info->page))) + return false; + cache->page_cache[cache->tail] = *dma_info; cache->tail = tail_next; return true; -- GitLab From 74423145d931f60cde223cc8a84a1ad3bedd772a Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 20 Jan 2017 18:12:17 +0100 Subject: [PATCH 1341/1442] bridge: netlink: call br_changelink() during br_dev_newlink() [ Upstream commit b6677449dff674cf5b81429b11d5c7f358852ef9 ] Any bridge options specified during link creation (e.g. ip link add) are ignored as br_dev_newlink() does not process them. Use br_changelink() to do it. Fixes: 133235161721 ("bridge: implement rtnl_link_ops->changelink") Signed-off-by: Ivan Vecera Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_netlink.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e99037c6f7b7..04741064a173 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -781,20 +781,6 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int br_dev_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) -{ - struct net_bridge *br = netdev_priv(dev); - - if (tb[IFLA_ADDRESS]) { - spin_lock_bh(&br->lock); - br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); - spin_unlock_bh(&br->lock); - } - - return register_netdevice(dev); -} - static int br_port_slave_changelink(struct net_device *brdev, struct net_device *dev, struct nlattr *tb[], @@ -1093,6 +1079,25 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return 0; } +static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_bridge *br = netdev_priv(dev); + int err; + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + err = br_changelink(dev, tb, data); + if (err) + return err; + + return register_netdevice(dev); +} + static size_t br_get_size(const struct net_device *brdev) { return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ -- GitLab From ad864d9fce0ec56cc8f6afe5c6a0e6d7f484b9eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 20 Jan 2017 12:58:34 -0800 Subject: [PATCH 1342/1442] net: mpls: Fix multipath selection for LSR use case [ Upstream commit 9f427a0e474a67b454420c131709600d44850486 ] MPLS multipath for LSR is broken -- always selecting the first nexthop in the one label case. For example: $ ip -f mpls ro ls 100 nexthop as to 200 via inet 172.16.2.2 dev virt12 nexthop as to 300 via inet 172.16.3.2 dev virt13 101 nexthop as to 201 via inet6 2000:2::2 dev virt12 nexthop as to 301 via inet6 2000:3::2 dev virt13 In this example incoming packets have a single MPLS labels which means BOS bit is set. The BOS bit is passed from mpls_forward down to mpls_multipath_hash which never processes the hash loop because BOS is 1. Update mpls_multipath_hash to process the entire label stack. mpls_hdr_len tracks the total mpls header length on each pass (on pass N mpls_hdr_len is N * sizeof(mpls_shim_hdr)). When the label is found with the BOS set it verifies the skb has sufficient header for ipv4 or ipv6, and find the IPv4 and IPv6 header by using the last mpls_hdr pointer and adding 1 to advance past it. With these changes I have verified the code correctly sees the label, BOS, IPv4 and IPv6 addresses in the network header and icmp/tcp/udp traffic for ipv4 and ipv6 are distributed across the nexthops. Fixes: 1c78efa8319ca ("mpls: flow-based multipath selection") Acked-by: Robert Shearman Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mpls/af_mpls.c | 48 ++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 15fe97644ffe..5b77377e5a15 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -98,18 +98,19 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); -static u32 mpls_multipath_hash(struct mpls_route *rt, - struct sk_buff *skb, bool bos) +static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) { struct mpls_entry_decoded dec; + unsigned int mpls_hdr_len = 0; struct mpls_shim_hdr *hdr; bool eli_seen = false; int label_index; u32 hash = 0; - for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; + for (label_index = 0; label_index < MAX_MP_SELECT_LABELS; label_index++) { - if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) + mpls_hdr_len += sizeof(*hdr); + if (!pskb_may_pull(skb, mpls_hdr_len)) break; /* Read and decode the current label */ @@ -134,37 +135,38 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, eli_seen = true; } - bos = dec.bos; - if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index + - sizeof(struct iphdr))) { + if (!dec.bos) + continue; + + /* found bottom label; does skb have room for a header? */ + if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) { const struct iphdr *v4hdr; - v4hdr = (const struct iphdr *)(mpls_hdr(skb) + - label_index); + v4hdr = (const struct iphdr *)(hdr + 1); if (v4hdr->version == 4) { hash = jhash_3words(ntohl(v4hdr->saddr), ntohl(v4hdr->daddr), v4hdr->protocol, hash); } else if (v4hdr->version == 6 && - pskb_may_pull(skb, sizeof(*hdr) * label_index + - sizeof(struct ipv6hdr))) { + pskb_may_pull(skb, mpls_hdr_len + + sizeof(struct ipv6hdr))) { const struct ipv6hdr *v6hdr; - v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) + - label_index); - + v6hdr = (const struct ipv6hdr *)(hdr + 1); hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); hash = jhash_1word(v6hdr->nexthdr, hash); } } + + break; } return hash; } static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, - struct sk_buff *skb, bool bos) + struct sk_buff *skb) { int alive = ACCESS_ONCE(rt->rt_nhn_alive); u32 hash = 0; @@ -180,7 +182,7 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, if (alive <= 0) return NULL; - hash = mpls_multipath_hash(rt, skb, bos); + hash = mpls_multipath_hash(rt, skb); nh_index = hash % alive; if (alive == rt->rt_nhn) goto out; @@ -278,17 +280,11 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, hdr = mpls_hdr(skb); dec = mpls_entry_decode(hdr); - /* Pop the label */ - skb_pull(skb, sizeof(*hdr)); - skb_reset_network_header(skb); - - skb_orphan(skb); - rt = mpls_route_input_rcu(net, dec.label); if (!rt) goto drop; - nh = mpls_select_multipath(rt, skb, dec.bos); + nh = mpls_select_multipath(rt, skb); if (!nh) goto drop; @@ -297,6 +293,12 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, if (!mpls_output_possible(out_dev)) goto drop; + /* Pop the label */ + skb_pull(skb, sizeof(*hdr)); + skb_reset_network_header(skb); + + skb_orphan(skb); + if (skb_warn_if_lro(skb)) goto drop; -- GitLab From 37b27b20a8a948ce5dfbd6e47272fd59e8bd0232 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Mon, 23 Jan 2017 14:18:43 +0800 Subject: [PATCH 1343/1442] r8152: don't execute runtime suspend if the tx is not empty [ Upstream commit 6a0b76c04ec157c88ca943debf78a8ee58469f2d ] Runtime suspend shouldn't be executed if the tx queue is not empty, because the device is not idle. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/r8152.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index be4c86faba4f..90b426c5ffce 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "6" +#define NET_VERSION "7" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -3572,6 +3572,8 @@ static bool delay_autosuspend(struct r8152 *tp) */ if (!sw_linking && tp->rtl_ops.in_nway(tp)) return true; + else if (!skb_queue_empty(&tp->tx_queue)) + return true; else return false; } -- GitLab From 93ff5e03bcba0761055491dc6bf52b1e0e33bbe6 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 23 Jan 2017 11:17:35 -0800 Subject: [PATCH 1344/1442] af_unix: move unix_mknod() out of bindlock [ Upstream commit 0fb44559ffd67de8517098b81f675fa0210f13f0 ] Dmitry reported a deadlock scenario: unix_bind() path: u->bindlock ==> sb_writer do_splice() path: sb_writer ==> pipe->mutex ==> u->bindlock In the unix_bind() code path, unix_mknod() does not have to be done with u->bindlock held, since it is a pure fs operation, so we can just move unix_mknod() out. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Rainer Weikusat Cc: Al Viro Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2358f2690ec5..2d03d5bcb5b9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -995,6 +995,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) unsigned int hash; struct unix_address *addr; struct hlist_head *list; + struct path path = { NULL, NULL }; err = -EINVAL; if (sunaddr->sun_family != AF_UNIX) @@ -1010,9 +1011,20 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; + if (sun_path[0]) { + umode_t mode = S_IFSOCK | + (SOCK_INODE(sock)->i_mode & ~current_umask()); + err = unix_mknod(sun_path, mode, &path); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; + goto out; + } + } + err = mutex_lock_interruptible(&u->bindlock); if (err) - goto out; + goto out_put; err = -EINVAL; if (u->addr) @@ -1029,16 +1041,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) atomic_set(&addr->refcnt, 1); if (sun_path[0]) { - struct path path; - umode_t mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(sun_path, mode, &path); - if (err) { - if (err == -EEXIST) - err = -EADDRINUSE; - unix_release_addr(addr); - goto out_up; - } addr->hash = UNIX_HASH_SIZE; hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); @@ -1065,6 +1067,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) spin_unlock(&unix_table_lock); out_up: mutex_unlock(&u->bindlock); +out_put: + if (err) + path_put(&path); out: return err; } -- GitLab From 087c2ecb21afddaeaa9105ac7df6015eb1e2ce6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 24 Jan 2017 10:45:38 +0100 Subject: [PATCH 1345/1442] qmi_wwan/cdc_ether: add device ID for HP lt2523 (Novatel E371) WWAN card MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5b9f57516337b523f7466a53939aaaea7b78141b ] Another rebranded Novatel E371. qmi_wwan should drive this device, while cdc_ether should ignore it. Even though the USB descriptors are plain CDC-ETHER that USB interface is a QMI interface. Ref commit 7fdb7846c9ca ("qmi_wwan/cdc_ether: add device IDs for Dell 5804 (Novatel E371) WWAN card") Cc: Dan Williams Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_ether.c | 8 ++++++++ drivers/net/usb/qmi_wwan.c | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index dd623f674487..b82be816256c 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -531,6 +531,7 @@ static const struct driver_info wwan_info = { #define SAMSUNG_VENDOR_ID 0x04e8 #define LENOVO_VENDOR_ID 0x17ef #define NVIDIA_VENDOR_ID 0x0955 +#define HP_VENDOR_ID 0x03f0 static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -677,6 +678,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* HP lt2523 (Novatel E371) - handled by qmi_wwan */ +{ + USB_DEVICE_AND_INTERFACE_INFO(HP_VENDOR_ID, 0x421d, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* AnyDATA ADU960S - handled by qmi_wwan */ { USB_DEVICE_AND_INTERFACE_INFO(0x16d5, 0x650a, USB_CLASS_COMM, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 6fe1cdb0174f..24d5272cdce5 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -654,6 +654,13 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, + { /* HP lt2523 (Novatel E371) */ + USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&qmi_wwan_info, + }, { /* HP lt4112 LTE/HSPA+ Gobi 4G Module (Huawei me906e) */ USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, -- GitLab From 89c2588627c20cf9d791a9bb1523646b101a59b1 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Tue, 24 Jan 2017 16:26:47 +0000 Subject: [PATCH 1346/1442] net: Specify the owning module for lwtunnel ops [ Upstream commit 88ff7334f25909802140e690c0e16433e485b0a0 ] Modules implementing lwtunnel ops should not be allowed to unload while there is state alive using those ops, so specify the owning module for all lwtunnel ops. Signed-off-by: Robert Shearman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/lwtunnel.h | 2 ++ net/ipv4/ip_tunnel_core.c | 2 ++ net/ipv6/ila/ila_lwt.c | 1 + net/mpls/mpls_iptunnel.c | 1 + 4 files changed, 6 insertions(+) diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 95581ca69f0f..3e0a7a942a0e 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -43,6 +43,8 @@ struct lwtunnel_encap_ops { int (*get_encap_size)(struct lwtunnel_state *lwtstate); int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); int (*xmit)(struct sk_buff *skb); + + struct module *owner; }; #ifdef CONFIG_LWTUNNEL diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index fed3d29f9eb3..0fd1976ab63b 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -313,6 +313,7 @@ static const struct lwtunnel_encap_ops ip_tun_lwt_ops = { .fill_encap = ip_tun_fill_encap_info, .get_encap_size = ip_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, + .owner = THIS_MODULE, }; static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { @@ -403,6 +404,7 @@ static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = { .fill_encap = ip6_tun_fill_encap_info, .get_encap_size = ip6_tun_encap_nlsize, .cmp_encap = ip_tun_cmp_encap, + .owner = THIS_MODULE, }; void __init ip_tunnel_core_init(void) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index e50c27a93e17..f3db364fc853 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -164,6 +164,7 @@ static const struct lwtunnel_encap_ops ila_encap_ops = { .fill_encap = ila_fill_encap_info, .get_encap_size = ila_encap_nlsize, .cmp_encap = ila_encap_cmp, + .owner = THIS_MODULE, }; int ila_lwt_init(void) diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index cf52cf30ac4b..bc9aaf58c7cc 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -218,6 +218,7 @@ static const struct lwtunnel_encap_ops mpls_iptun_ops = { .fill_encap = mpls_fill_encap_info, .get_encap_size = mpls_encap_nlsize, .cmp_encap = mpls_encap_cmp, + .owner = THIS_MODULE, }; static int __init mpls_iptunnel_init(void) -- GitLab From e972cce0c833fa990622a2f46db79979ab07485c Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Tue, 24 Jan 2017 16:26:48 +0000 Subject: [PATCH 1347/1442] lwtunnel: Fix oops on state free after encap module unload [ Upstream commit 85c814016ce3b371016c2c054a905fa2492f5a65 ] When attempting to free lwtunnel state after the module for the encap has been unloaded an oops occurs: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: lwtstate_free+0x18/0x40 [..] task: ffff88003e372380 task.stack: ffffc900001fc000 RIP: 0010:lwtstate_free+0x18/0x40 RSP: 0018:ffff88003fd83e88 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88002bbb3380 RCX: ffff88000c91a300 [..] Call Trace: free_fib_info_rcu+0x195/0x1a0 ? rt_fibinfo_free+0x50/0x50 rcu_process_callbacks+0x2d3/0x850 ? rcu_process_callbacks+0x296/0x850 __do_softirq+0xe4/0x4cb irq_exit+0xb0/0xc0 smp_apic_timer_interrupt+0x3d/0x50 apic_timer_interrupt+0x93/0xa0 [..] Code: e8 6e c6 fc ff 89 d8 5b 5d c3 bb de ff ff ff eb f4 66 90 66 66 66 66 90 55 48 89 e5 53 0f b7 07 48 89 fb 48 8b 04 c5 00 81 d5 81 <48> 8b 40 08 48 85 c0 74 13 ff d0 48 8d 7b 20 be 20 00 00 00 e8 The problem is after the module for the encap can be unloaded the corresponding ops is removed and is thus NULL here. Modules implementing lwtunnel ops should not be allowed to unload while there is state alive using those ops, so grab the module reference for the ops on creating lwtunnel state and of course release the reference when freeing the state. Fixes: 1104d9ba443a ("lwtunnel: Add destroy state operation") Signed-off-by: Robert Shearman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/lwtunnel.h | 5 +---- net/core/lwtunnel.c | 14 +++++++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 3e0a7a942a0e..fc7c0dbdd1ff 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -48,10 +48,7 @@ struct lwtunnel_encap_ops { }; #ifdef CONFIG_LWTUNNEL -static inline void lwtstate_free(struct lwtunnel_state *lws) -{ - kfree(lws); -} +void lwtstate_free(struct lwtunnel_state *lws); static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 2f6db386624e..afa64f086d87 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -66,6 +66,15 @@ EXPORT_SYMBOL(lwtunnel_state_alloc); static const struct lwtunnel_encap_ops __rcu * lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; +void lwtstate_free(struct lwtunnel_state *lws) +{ + const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; + + kfree(lws); + module_put(ops->owner); +} +EXPORT_SYMBOL(lwtstate_free); + int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, unsigned int num) { @@ -111,8 +120,11 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[encap_type]); - if (likely(ops && ops->build_state)) + if (likely(ops && ops->build_state && try_module_get(ops->owner))) { ret = ops->build_state(dev, encap, family, cfg, lws); + if (ret) + module_put(ops->owner); + } rcu_read_unlock(); return ret; -- GitLab From 9f42bc4f9c1c968ce7f87b451544a119600e3666 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 25 Jan 2017 09:10:41 -0800 Subject: [PATCH 1348/1442] net: dsa: Bring back device detaching in dsa_slave_suspend() [ Upstream commit f154be241d22298d2b63c9b613f619fa1086ea75 ] Commit 448b4482c671 ("net: dsa: Add lockdep class to tx queues to avoid lockdep splat") removed the netif_device_detach() call done in dsa_slave_suspend() which is necessary, and paired with a corresponding netif_device_attach(), bring it back. Fixes: 448b4482c671 ("net: dsa: Add lockdep class to tx queues to avoid lockdep splat") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/slave.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 30e2e21d7619..3ff9d97cf56b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1201,6 +1201,8 @@ int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); + netif_device_detach(slave_dev); + if (p->phy) { phy_stop(p->phy); p->old_pause = -1; -- GitLab From d20e4ad06c4be123ef350a0ca78f1480bbdc5f8e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Feb 2017 08:55:53 +0100 Subject: [PATCH 1349/1442] xfs: bump up reserved blocks in xfs_alloc_set_aside commit 5149fd327f16e393c1d04fa5325ab072c32472bf upstream. Setting aside 4 blocks globally for bmbt splits isn't all that useful, as different threads can allocate space in parallel. Bump it to 4 blocks per AG to allow each thread that is currently doing an allocation to dip into it separately. Without that we may no have enough reserved blocks if there are enough parallel transactions in an almost out space file system that all run into bmap btree splits. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_alloc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 5050056a0b06..0a46f8488b8d 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -95,10 +95,7 @@ unsigned int xfs_alloc_set_aside( struct xfs_mount *mp) { - unsigned int blocks; - - blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE); - return blocks; + return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4); } /* -- GitLab From c63f4d3aa09d0b8c36836a887d32cf20a974509f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Feb 2017 08:55:54 +0100 Subject: [PATCH 1350/1442] xfs: fix bogus minleft manipulations commit 255c516278175a6dc7037d1406307f35237d8688 upstream. We can't just set minleft to 0 when we're low on space - that's exactly what we need minleft for: to protect space in the AG for btree block allocations when we are low on free space. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_alloc.c | 24 +++++++----------------- fs/xfs/libxfs/xfs_bmap.c | 3 --- fs/xfs/libxfs/xfs_bmap_btree.c | 3 +-- 3 files changed, 8 insertions(+), 22 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 0a46f8488b8d..fe925702c955 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2635,12 +2635,10 @@ xfs_alloc_vextent( xfs_agblock_t agsize; /* allocation group size */ int error; int flags; /* XFS_ALLOC_FLAG_... locking flags */ - xfs_extlen_t minleft;/* minimum left value, temp copy */ xfs_mount_t *mp; /* mount structure pointer */ xfs_agnumber_t sagno; /* starting allocation group number */ xfs_alloctype_t type; /* input allocation type */ int bump_rotor = 0; - int no_min = 0; xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ mp = args->mp; @@ -2669,7 +2667,6 @@ xfs_alloc_vextent( trace_xfs_alloc_vextent_badargs(args); return 0; } - minleft = args->minleft; switch (type) { case XFS_ALLOCTYPE_THIS_AG: @@ -2680,9 +2677,7 @@ xfs_alloc_vextent( */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); args->pag = xfs_perag_get(mp, args->agno); - args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); - args->minleft = minleft; if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -2747,9 +2742,7 @@ xfs_alloc_vextent( */ for (;;) { args->pag = xfs_perag_get(mp, args->agno); - if (no_min) args->minleft = 0; error = xfs_alloc_fix_freelist(args, flags); - args->minleft = minleft; if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -2789,20 +2782,17 @@ xfs_alloc_vextent( * or switch to non-trylock mode. */ if (args->agno == sagno) { - if (no_min == 1) { + if (flags == 0) { args->agbno = NULLAGBLOCK; trace_xfs_alloc_vextent_allfailed(args); break; } - if (flags == 0) { - no_min = 1; - } else { - flags = 0; - if (type == XFS_ALLOCTYPE_START_BNO) { - args->agbno = XFS_FSB_TO_AGBNO(mp, - args->fsbno); - args->type = XFS_ALLOCTYPE_NEAR_BNO; - } + + flags = 0; + if (type == XFS_ALLOCTYPE_START_BNO) { + args->agbno = XFS_FSB_TO_AGBNO(mp, + args->fsbno); + args->type = XFS_ALLOCTYPE_NEAR_BNO; } } xfs_perag_put(args->pag); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 89d727b659fc..52dc5c175001 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3903,7 +3903,6 @@ xfs_bmap_btalloc( args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; args.total = ap->minlen; - args.minleft = 0; if ((error = xfs_alloc_vextent(&args))) return error; ap->dfops->dop_low = true; @@ -4437,8 +4436,6 @@ xfs_bmapi_allocate( if (error) return error; - if (bma->dfops->dop_low) - bma->minleft = 0; if (bma->cur) bma->cur->bc_private.b.firstblock = *bma->firstblock; if (bma->blkno == NULLFSBLOCK) diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 049fa597ae91..f76c1693ff01 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -502,12 +502,11 @@ xfs_bmbt_alloc_block( if (args.fsbno == NULLFSBLOCK && args.minleft) { /* * Could not find an AG with enough free space to satisfy - * a full btree split. Try again without minleft and if + * a full btree split. Try again and if * successful activate the lowspace algorithm. */ args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; - args.minleft = 0; error = xfs_alloc_vextent(&args); if (error) goto error0; -- GitLab From 6b81365b1e5795578c4aad59a37c15e958cdcdfe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Feb 2017 08:55:55 +0100 Subject: [PATCH 1351/1442] xfs: adjust allocation length in xfs_alloc_space_available commit 54fee133ad59c87ab01dd84ab3e9397134b32acb upstream. We must decide in xfs_alloc_fix_freelist if we can perform an allocation from a given AG is possible or not based on the available space, and should not fail the allocation past that point on a healthy file system. But currently we have two additional places that second-guess xfs_alloc_fix_freelist: xfs_alloc_ag_vextent tries to adjust the maxlen parameter to remove the reservation before doing the allocation (but ignores the various minium freespace requirements), and xfs_alloc_fix_minleft tries to fix up the allocated length after we've found an extent, but ignores the reservations and also doesn't take the AGFL into account (and thus fails allocations for not matching minlen in some cases). Remove all these later fixups and just correct the maxlen argument inside xfs_alloc_fix_freelist once we have the AGF buffer locked. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_alloc.c | 81 ++++++++------------------------------- fs/xfs/libxfs/xfs_alloc.h | 2 +- 2 files changed, 18 insertions(+), 65 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index fe925702c955..f2e7eb6e5243 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -362,35 +362,11 @@ xfs_alloc_fix_len( return; ASSERT(rlen >= args->minlen && rlen <= args->maxlen); ASSERT(rlen % args->prod == args->mod); + ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >= + rlen + args->minleft); args->len = rlen; } -/* - * Fix up length if there is too little space left in the a.g. - * Return 1 if ok, 0 if too little, should give up. - */ -STATIC int -xfs_alloc_fix_minleft( - xfs_alloc_arg_t *args) /* allocation argument structure */ -{ - xfs_agf_t *agf; /* a.g. freelist header */ - int diff; /* free space difference */ - - if (args->minleft == 0) - return 1; - agf = XFS_BUF_TO_AGF(args->agbp); - diff = be32_to_cpu(agf->agf_freeblks) - - args->len - args->minleft; - if (diff >= 0) - return 1; - args->len += diff; /* shrink the allocated space */ - /* casts to (int) catch length underflows */ - if ((int)args->len >= (int)args->minlen) - return 1; - args->agbno = NULLAGBLOCK; - return 0; -} - /* * Update the two btrees, logically removing from freespace the extent * starting at rbno, rlen blocks. The extent is contained within the @@ -686,8 +662,6 @@ xfs_alloc_ag_vextent( xfs_alloc_arg_t *args) /* argument structure for allocation */ { int error=0; - xfs_extlen_t reservation; - xfs_extlen_t oldmax; ASSERT(args->minlen > 0); ASSERT(args->maxlen > 0); @@ -695,20 +669,6 @@ xfs_alloc_ag_vextent( ASSERT(args->mod < args->prod); ASSERT(args->alignment > 0); - /* - * Clamp maxlen to the amount of free space minus any reservations - * that have been made. - */ - oldmax = args->maxlen; - reservation = xfs_ag_resv_needed(args->pag, args->resv); - if (args->maxlen > args->pag->pagf_freeblks - reservation) - args->maxlen = args->pag->pagf_freeblks - reservation; - if (args->maxlen == 0) { - args->agbno = NULLAGBLOCK; - args->maxlen = oldmax; - return 0; - } - /* * Branch to correct routine based on the type. */ @@ -728,8 +688,6 @@ xfs_alloc_ag_vextent( /* NOTREACHED */ } - args->maxlen = oldmax; - if (error || args->agbno == NULLAGBLOCK) return error; @@ -838,9 +796,6 @@ xfs_alloc_ag_vextent_exact( args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) - args->agbno; xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto not_found; - ASSERT(args->agbno + args->len <= tend); /* @@ -1146,12 +1101,7 @@ xfs_alloc_ag_vextent_near( XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->len = blen; - if (!xfs_alloc_fix_minleft(args)) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_near_nominleft(args); - return 0; - } - blen = args->len; + /* * We are allocating starting at bnew for blen blocks. */ @@ -1343,12 +1293,6 @@ xfs_alloc_ag_vextent_near( */ args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) { - trace_xfs_alloc_near_nominleft(args); - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - return 0; - } rlen = args->len; (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, args->datatype, ltbnoa, ltlena, <new); @@ -1550,8 +1494,6 @@ xfs_alloc_ag_vextent_size( } xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto out_nominleft; rlen = args->len; XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); /* @@ -2070,10 +2012,20 @@ xfs_alloc_space_available( /* do we have enough free space remaining for the allocation? */ available = (int)(pag->pagf_freeblks + pag->pagf_flcount - - reservation - min_free - args->total); - if (available < (int)args->minleft || available <= 0) + reservation - min_free - args->minleft); + if (available < (int)args->total) return false; + /* + * Clamp maxlen to the amount of free space available for the actual + * extent allocation. + */ + if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) { + args->maxlen = available; + ASSERT(args->maxlen > 0); + ASSERT(args->maxlen >= args->minlen); + } + return true; } @@ -2119,7 +2071,8 @@ xfs_alloc_fix_freelist( } need = xfs_alloc_min_freelist(mp, pag); - if (!xfs_alloc_space_available(args, need, flags)) + if (!xfs_alloc_space_available(args, need, flags | + XFS_ALLOC_FLAG_CHECK)) goto out_agbp_relse; /* diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 7c404a6b0ae3..1d0f48a501a3 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -56,7 +56,7 @@ typedef unsigned int xfs_alloctype_t; #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ #define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */ #define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */ - +#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */ /* * Argument structure for xfs_alloc routines. -- GitLab From e9b77651910722cd74a3936f230366503089bc80 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Feb 2017 08:55:56 +0100 Subject: [PATCH 1352/1442] xfs: don't rely on ->total in xfs_alloc_space_available commit 12ef830198b0d71668eb9b59f9ba69d32951a48a upstream. ->total is a bit of an odd parameter passed down to the low-level allocator all the way from the high-level callers. It's supposed to contain the maximum number of blocks to be allocated for the whole transaction [1]. But in xfs_iomap_write_allocate we only convert existing delayed allocations and thus only have a minimal block reservation for the current transaction, so xfs_alloc_space_available can't use it for the allocation decisions. Use the maximum of args->total and the calculated block requirement to make a decision. We probably should get rid of args->total eventually and instead apply ->minleft more broadly, but that will require some extensive changes all over. [1] which creates lots of confusion as most callers don't decrement it once doing a first allocation. But that's for a separate series. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_alloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index f2e7eb6e5243..9f06a211e157 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1995,7 +1995,7 @@ xfs_alloc_space_available( int flags) { struct xfs_perag *pag = args->pag; - xfs_extlen_t longest; + xfs_extlen_t alloc_len, longest; xfs_extlen_t reservation; /* blocks that are still reserved */ int available; @@ -2005,15 +2005,16 @@ xfs_alloc_space_available( reservation = xfs_ag_resv_needed(pag, args->resv); /* do we have enough contiguous free space for the allocation? */ + alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free, reservation); - if ((args->minlen + args->alignment + args->minalignslop - 1) > longest) + if (longest < alloc_len) return false; /* do we have enough free space remaining for the allocation? */ available = (int)(pag->pagf_freeblks + pag->pagf_flcount - reservation - min_free - args->minleft); - if (available < (int)args->total) + if (available < (int)max(args->total, alloc_len)) return false; /* -- GitLab From 4f4d5082ec73f6e0bd44a8eccbef440b96f46a98 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Feb 2017 08:55:57 +0100 Subject: [PATCH 1353/1442] xfs: don't print warnings when xfs_log_force fails commit 84a4620cfe97c9d57e39b2369bfb77faff55063d upstream. There are only two reasons for xfs_log_force / xfs_log_force_lsn to fail: one is an I/O error, for which xlog_bdstrat already logs a warning, and the second is an already shutdown log due to a previous I/O errors. In the latter case we'll already have a previous indication for the actual error, but the large stream of misleading warnings from xfs_log_force will probably scroll it out of the message buffer. Simply removing the warnings thus makes the XFS log reporting significantly better. Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 3b74fa011bb1..4017aa967331 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3324,12 +3324,8 @@ xfs_log_force( xfs_mount_t *mp, uint flags) { - int error; - trace_xfs_log_force(mp, 0, _RET_IP_); - error = _xfs_log_force(mp, flags, NULL); - if (error) - xfs_warn(mp, "%s: error %d returned.", __func__, error); + _xfs_log_force(mp, flags, NULL); } /* @@ -3473,12 +3469,8 @@ xfs_log_force_lsn( xfs_lsn_t lsn, uint flags) { - int error; - trace_xfs_log_force(mp, lsn, _RET_IP_); - error = _xfs_log_force_lsn(mp, lsn, flags, NULL); - if (error) - xfs_warn(mp, "%s: error %d returned.", __func__, error); + _xfs_log_force_lsn(mp, lsn, flags, NULL); } /* -- GitLab From 624e54b5aff15c6d49974f404c5cef5ce7f89cd0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 2 Feb 2017 08:55:58 +0100 Subject: [PATCH 1354/1442] xfs: make the ASSERT() condition likely commit bf46ecc3d8cca05f2907cf482755c42c2b11a79d upstream. The ASSERT() condition is the normal case, not the exception, so testing the condition should be likely(), not unlikely(). Reviewed-by: Christoph Hellwig Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_linux.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 68640fb63a54..1455b25205a8 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -330,11 +330,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) } #define ASSERT_ALWAYS(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) #ifdef DEBUG #define ASSERT(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) #ifndef STATIC # define STATIC noinline @@ -345,7 +345,7 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) #ifdef XFS_WARN #define ASSERT(expr) \ - (unlikely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__)) #ifndef STATIC # define STATIC static noinline -- GitLab From e5325fcf70b1cd2c45503b854fde3f1bd44216da Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 2 Feb 2017 08:55:59 +0100 Subject: [PATCH 1355/1442] xfs: sanity check directory inode di_size commit 3c6f46eacd876bd723a9bad3c6882714c052fd8e upstream. This changes fixes an assertion hit when fuzzing on-disk i_mode values. The easy case to fix is when changing an empty file i_mode to S_IFDIR. In this case, xfs_dinode_verify() detects an illegal zero size for directory and fails to load the inode structure from disk. For the case of non empty file whose i_mode is changed to S_IFDIR, the ASSERT() statement in xfs_dir2_isblock() is replaced with return -EFSCORRUPTED, to avoid interacting with corrupted jusk also when XFS_DEBUG is disabled. Suggested-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Amir Goldstein Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_dir2.c | 3 ++- fs/xfs/libxfs/xfs_inode_buf.c | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 20a96dd5af7e..ec326d272efb 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -631,7 +631,8 @@ xfs_dir2_isblock( if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) return rval; rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; - ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize); + if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize) + return -EFSCORRUPTED; *vp = rval; return 0; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index c906e50515f0..977245de8953 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -386,6 +386,7 @@ xfs_dinode_verify( struct xfs_inode *ip, struct xfs_dinode *dip) { + uint16_t mode; uint16_t flags; uint64_t flags2; @@ -396,8 +397,10 @@ xfs_dinode_verify( if (be64_to_cpu(dip->di_size) & (1ULL << 63)) return false; - /* No zero-length symlinks. */ - if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0) + mode = be16_to_cpu(dip->di_mode); + + /* No zero-length symlinks/dirs. */ + if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) return false; /* only version 3 or greater inodes are extensively verified here */ -- GitLab From 4fac84ba1da7aa62dea520dcedd4f6de117d8f2b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 2 Feb 2017 08:56:00 +0100 Subject: [PATCH 1356/1442] xfs: add missing include dependencies to xfs_dir2.h commit b597dd5373a1ccc08218665dc8417433b1c09550 upstream. xfs_dir2.h dereferences some data types in inline functions and fails to include those type definitions, e.g.: xfs_dir2_data_aoff_t, struct xfs_da_geometry. Signed-off-by: Amir Goldstein Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_dir2.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index becc926c3e3d..6a3fe31d4bc2 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -18,6 +18,9 @@ #ifndef __XFS_DIR2_H__ #define __XFS_DIR2_H__ +#include "xfs_da_format.h" +#include "xfs_da_btree.h" + struct xfs_defer_ops; struct xfs_da_args; struct xfs_inode; -- GitLab From b5f68e24cc7bc3492ebc5c70f3ef6babcbd4188b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 2 Feb 2017 08:56:01 +0100 Subject: [PATCH 1357/1442] xfs: replace xfs_mode_to_ftype table with switch statement commit 1fc4d33fed124fb182e8e6c214e973a29389ae83. The size of the xfs_mode_to_ftype[] conversion table was too small to handle an invalid value of mode=S_IFMT. Instead of fixing the table size, replace the conversion table with a conversion helper that uses a switch statement. Suggested-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Amir Goldstein Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_dir2.c | 36 ++++++++++++++++++++++-------------- fs/xfs/libxfs/xfs_dir2.h | 5 ++--- fs/xfs/xfs_iops.c | 2 +- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index ec326d272efb..1a978bd9d506 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -36,21 +36,29 @@ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; /* - * @mode, if set, indicates that the type field needs to be set up. - * This uses the transformation from file mode to DT_* as defined in linux/fs.h - * for file type specification. This will be propagated into the directory - * structure if appropriate for the given operation and filesystem config. + * Convert inode mode to directory entry filetype */ -const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { - [0] = XFS_DIR3_FT_UNKNOWN, - [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, - [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, - [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, -}; +const unsigned char xfs_mode_to_ftype(int mode) +{ + switch (mode & S_IFMT) { + case S_IFREG: + return XFS_DIR3_FT_REG_FILE; + case S_IFDIR: + return XFS_DIR3_FT_DIR; + case S_IFCHR: + return XFS_DIR3_FT_CHRDEV; + case S_IFBLK: + return XFS_DIR3_FT_BLKDEV; + case S_IFIFO: + return XFS_DIR3_FT_FIFO; + case S_IFSOCK: + return XFS_DIR3_FT_SOCK; + case S_IFLNK: + return XFS_DIR3_FT_SYMLINK; + default: + return XFS_DIR3_FT_UNKNOWN; + } +} /* * ASCII case-insensitive (ie. A-Z) support for directories that was diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 6a3fe31d4bc2..0051a34b8c3f 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -35,10 +35,9 @@ struct xfs_dir2_data_unused; extern struct xfs_name xfs_name_dotdot; /* - * directory filetype conversion tables. + * Convert inode mode to directory entry filetype */ -#define S_SHIFT 12 -extern const unsigned char xfs_mode_to_ftype[]; +extern const unsigned char xfs_mode_to_ftype(int mode); /* * directory operations vector for encode/decode routines diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 405a65cd9d6b..1abe71918734 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -103,7 +103,7 @@ xfs_dentry_to_name( { namep->name = dentry->d_name.name; namep->len = dentry->d_name.len; - namep->type = xfs_mode_to_ftype[(mode & S_IFMT) >> S_SHIFT]; + namep->type = xfs_mode_to_ftype(mode); } STATIC void -- GitLab From 43ce5921773b827531ed123016625481d43c2bd9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 2 Feb 2017 08:56:02 +0100 Subject: [PATCH 1358/1442] xfs: sanity check inode mode when creating new dentry commit fab8eef86c814c3dd46bc5d760b6e4a53d5fc5a6 upstream. The helper xfs_dentry_to_name() is used by 2 different classes of callers: Callers that pass zero mode and don't care about the returned name.type field and Callers that pass non zero mode and do care about the name.type field. Change xfs_dentry_to_name() to not take the mode argument and change the call sites of the first class to not pass the mode argument. Create a new helper xfs_dentry_mode_to_name() which does pass the mode argument and returns -EFSCORRUPTED if mode is invalid. Callers that translate non zero mode to on-disk file type now check the return value and will export the error to user instead of staging an invalid file type to be written to directory entry. Signed-off-by: Amir Goldstein Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_iops.c | 48 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1abe71918734..f5e0f608e245 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -97,6 +97,16 @@ xfs_init_security( static void xfs_dentry_to_name( + struct xfs_name *namep, + struct dentry *dentry) +{ + namep->name = dentry->d_name.name; + namep->len = dentry->d_name.len; + namep->type = XFS_DIR3_FT_UNKNOWN; +} + +static int +xfs_dentry_mode_to_name( struct xfs_name *namep, struct dentry *dentry, int mode) @@ -104,6 +114,11 @@ xfs_dentry_to_name( namep->name = dentry->d_name.name; namep->len = dentry->d_name.len; namep->type = xfs_mode_to_ftype(mode); + + if (unlikely(namep->type == XFS_DIR3_FT_UNKNOWN)) + return -EFSCORRUPTED; + + return 0; } STATIC void @@ -119,7 +134,7 @@ xfs_cleanup_inode( * xfs_init_security we must back out. * ENOSPC can hit here, among other things. */ - xfs_dentry_to_name(&teardown, dentry, 0); + xfs_dentry_to_name(&teardown, dentry); xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); } @@ -154,8 +169,12 @@ xfs_generic_create( if (error) return error; + /* Verify mode is valid also for tmpfile case */ + error = xfs_dentry_mode_to_name(&name, dentry, mode); + if (unlikely(error)) + goto out_free_acl; + if (!tmpfile) { - xfs_dentry_to_name(&name, dentry, mode); error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); } else { error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip); @@ -248,7 +267,7 @@ xfs_vn_lookup( if (dentry->d_name.len >= MAXNAMELEN) return ERR_PTR(-ENAMETOOLONG); - xfs_dentry_to_name(&name, dentry, 0); + xfs_dentry_to_name(&name, dentry); error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); if (unlikely(error)) { if (unlikely(error != -ENOENT)) @@ -275,7 +294,7 @@ xfs_vn_ci_lookup( if (dentry->d_name.len >= MAXNAMELEN) return ERR_PTR(-ENAMETOOLONG); - xfs_dentry_to_name(&xname, dentry, 0); + xfs_dentry_to_name(&xname, dentry); error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); if (unlikely(error)) { if (unlikely(error != -ENOENT)) @@ -310,7 +329,9 @@ xfs_vn_link( struct xfs_name name; int error; - xfs_dentry_to_name(&name, dentry, inode->i_mode); + error = xfs_dentry_mode_to_name(&name, dentry, inode->i_mode); + if (unlikely(error)) + return error; error = xfs_link(XFS_I(dir), XFS_I(inode), &name); if (unlikely(error)) @@ -329,7 +350,7 @@ xfs_vn_unlink( struct xfs_name name; int error; - xfs_dentry_to_name(&name, dentry, 0); + xfs_dentry_to_name(&name, dentry); error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry))); if (error) @@ -359,7 +380,9 @@ xfs_vn_symlink( mode = S_IFLNK | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); - xfs_dentry_to_name(&name, dentry, mode); + error = xfs_dentry_mode_to_name(&name, dentry, mode); + if (unlikely(error)) + goto out; error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); if (unlikely(error)) @@ -395,6 +418,7 @@ xfs_vn_rename( { struct inode *new_inode = d_inode(ndentry); int omode = 0; + int error; struct xfs_name oname; struct xfs_name nname; @@ -405,8 +429,14 @@ xfs_vn_rename( if (flags & RENAME_EXCHANGE) omode = d_inode(ndentry)->i_mode; - xfs_dentry_to_name(&oname, odentry, omode); - xfs_dentry_to_name(&nname, ndentry, d_inode(odentry)->i_mode); + error = xfs_dentry_mode_to_name(&oname, odentry, omode); + if (omode && unlikely(error)) + return error; + + error = xfs_dentry_mode_to_name(&nname, ndentry, + d_inode(odentry)->i_mode); + if (unlikely(error)) + return error; return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)), XFS_I(ndir), &nname, -- GitLab From d3201a14b36ae6ffb36b6b830e216f0ae4ebc184 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 2 Feb 2017 08:56:03 +0100 Subject: [PATCH 1359/1442] xfs: sanity check inode di_mode commit a324cbf10a3c67aaa10c9f47f7b5801562925bc2 upstream. Check for invalid file type in xfs_dinode_verify() and fail to load the inode structure from disk. Reviewed-by: Darrick J. Wong Signed-off-by: Amir Goldstein Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_inode_buf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 977245de8953..37ee7f01a35d 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -29,6 +29,7 @@ #include "xfs_icache.h" #include "xfs_trans.h" #include "xfs_ialloc.h" +#include "xfs_dir2.h" /* * Check that none of the inode's in the buffer have a next @@ -398,6 +399,8 @@ xfs_dinode_verify( return false; mode = be16_to_cpu(dip->di_mode); + if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) + return false; /* No zero-length symlinks/dirs. */ if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) -- GitLab From d062d90c35f292b3e6ee266746259e2e2950f940 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 2 Feb 2017 08:56:04 +0100 Subject: [PATCH 1360/1442] xfs: don't wrap ID in xfs_dq_get_next_id commit 657bdfb7f5e68ca5e2ed009ab473c429b0d6af85 upstream. The GETNEXTQOTA ioctl takes whatever ID is sent in, and looks for the next active quota for an user equal or higher to that ID. But if we are at the maximum ID and then ask for the "next" one, we may wrap back to zero. In this case, userspace may loop forever, because it will start querying again at zero. We'll fix this in userspace as well, but for the kernel, return -ENOENT if we ask for the next quota ID past UINT_MAX so the caller knows to stop. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_dquot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 7a30b8f11db7..9d06cc30e875 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -710,6 +710,10 @@ xfs_dq_get_next_id( /* Simple advance */ next_id = *id + 1; + /* If we'd wrap past the max ID, stop */ + if (next_id < *id) + return -ENOENT; + /* If new ID is within the current chunk, advancing it sufficed */ if (next_id % mp->m_quotainfo->qi_dqperchunk) { *id = next_id; -- GitLab From 29f319275e7637b3d146aa67db7fad036339fcc1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Feb 2017 08:56:05 +0100 Subject: [PATCH 1361/1442] xfs: fix xfs_mode_to_ftype() prototype commit fd29f7af75b7adf250beccffa63746c6a88e2b74 upstream. A harmless warning just got introduced: fs/xfs/libxfs/xfs_dir2.h:40:8: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers] Removing the 'const' modifier avoids the warning and has no other effect. Fixes: 1fc4d33fed12 ("xfs: replace xfs_mode_to_ftype table with switch statement") Signed-off-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_dir2.c | 2 +- fs/xfs/libxfs/xfs_dir2.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 1a978bd9d506..7825d78d4587 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -38,7 +38,7 @@ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; /* * Convert inode mode to directory entry filetype */ -const unsigned char xfs_mode_to_ftype(int mode) +unsigned char xfs_mode_to_ftype(int mode) { switch (mode & S_IFMT) { case S_IFREG: diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 0051a34b8c3f..ae0d55bf6500 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -37,7 +37,7 @@ extern struct xfs_name xfs_name_dotdot; /* * Convert inode mode to directory entry filetype */ -extern const unsigned char xfs_mode_to_ftype(int mode); +extern unsigned char xfs_mode_to_ftype(int mode); /* * directory operations vector for encode/decode routines -- GitLab From 214d55efa25557ca4d023c4001d798b7d360cd8b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Feb 2017 08:56:06 +0100 Subject: [PATCH 1362/1442] xfs: fix COW writeback race commit d2b3964a0780d2d2994eba57f950d6c9fe489ed8 upstream. Due to the way how xfs_iomap_write_allocate tries to convert the whole found extents from delalloc to real space we can run into a race condition with multiple threads doing writes to this same extent. For the non-COW case that is harmless as the only thing that can happen is that we call xfs_bmapi_write on an extent that has already been converted to a real allocation. For COW writes where we move the extent from the COW to the data fork after I/O completion the race is, however, not quite as harmless. In the worst case we are now calling xfs_bmapi_write on a region that contains hole in the COW work, which will trip up an assert in debug builds or lead to file system corruption in non-debug builds. This seems to be reproducible with workloads of small O_DSYNC write, although so far I've not managed to come up with a with an isolated reproducer. The fix for the issue is relatively simple: tell xfs_bmapi_write that we are only asked to convert delayed allocations and skip holes in that case. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 44 +++++++++++++++++++++++++++++----------- fs/xfs/libxfs/xfs_bmap.h | 6 +++++- fs/xfs/xfs_iomap.c | 2 +- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 52dc5c175001..fbb60d30089c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4607,8 +4607,6 @@ xfs_bmapi_write( int n; /* current extent index */ xfs_fileoff_t obno; /* old block number (offset) */ int whichfork; /* data or attr fork */ - char inhole; /* current location is hole in file */ - char wasdelay; /* old extent was delayed */ #ifdef DEBUG xfs_fileoff_t orig_bno; /* original block number value */ @@ -4694,22 +4692,44 @@ xfs_bmapi_write( bma.firstblock = firstblock; while (bno < end && n < *nmap) { - inhole = eof || bma.got.br_startoff > bno; - wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); + bool need_alloc = false, wasdelay = false; - /* - * Make sure we only reflink into a hole. - */ - if (flags & XFS_BMAPI_REMAP) - ASSERT(inhole); - if (flags & XFS_BMAPI_COWFORK) - ASSERT(!inhole); + /* in hole or beyoned EOF? */ + if (eof || bma.got.br_startoff > bno) { + if (flags & XFS_BMAPI_DELALLOC) { + /* + * For the COW fork we can reasonably get a + * request for converting an extent that races + * with other threads already having converted + * part of it, as there converting COW to + * regular blocks is not protected using the + * IOLOCK. + */ + ASSERT(flags & XFS_BMAPI_COWFORK); + if (!(flags & XFS_BMAPI_COWFORK)) { + error = -EIO; + goto error0; + } + + if (eof || bno >= end) + break; + } else { + need_alloc = true; + } + } else { + /* + * Make sure we only reflink into a hole. + */ + ASSERT(!(flags & XFS_BMAPI_REMAP)); + if (isnullstartblock(bma.got.br_startblock)) + wasdelay = true; + } /* * First, deal with the hole before the allocated space * that we found, if any. */ - if (inhole || wasdelay) { + if (need_alloc || wasdelay) { bma.eof = eof; bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.wasdel = wasdelay; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index d6d175a4fdec..e7d40b39f18f 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -110,6 +110,9 @@ struct xfs_extent_free_item /* Map something in the CoW fork. */ #define XFS_BMAPI_COWFORK 0x200 +/* Only convert delalloc space, don't allocate entirely new extents */ +#define XFS_BMAPI_DELALLOC 0x400 + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -120,7 +123,8 @@ struct xfs_extent_free_item { XFS_BMAPI_CONVERT, "CONVERT" }, \ { XFS_BMAPI_ZERO, "ZERO" }, \ { XFS_BMAPI_REMAP, "REMAP" }, \ - { XFS_BMAPI_COWFORK, "COWFORK" } + { XFS_BMAPI_COWFORK, "COWFORK" }, \ + { XFS_BMAPI_DELALLOC, "DELALLOC" } static inline int xfs_bmapi_aflag(int w) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 15a83813b708..cdc6bdd495be 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -681,7 +681,7 @@ xfs_iomap_write_allocate( xfs_trans_t *tp; int nimaps; int error = 0; - int flags = 0; + int flags = XFS_BMAPI_DELALLOC; int nres; if (whichfork == XFS_COW_FORK) -- GitLab From 29094164ea999bfc48ef48780c1ae057afaafcb1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Feb 2017 08:56:07 +0100 Subject: [PATCH 1363/1442] xfs: verify dirblocklog correctly commit 83d230eb5c638949350f4761acdfc0af5cb1bc00 upstream. sb_dirblklog is added to sb_blocklog to compute the directory block size in bytes. Therefore, we must compare the sum of both those values against XFS_MAX_BLOCKSIZE_LOG, not just dirblklog. Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_sb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 2580262e4ea0..584ec896a533 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -242,7 +242,7 @@ xfs_mount_validate_sb( sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_blocksize != (1 << sbp->sb_blocklog) || - sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || + sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || sbp->sb_inodelog < XFS_DINODE_MIN_LOG || -- GitLab From aab858dabd5ee4df1d8876449db2ef7868d1b5d0 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 2 Feb 2017 08:56:08 +0100 Subject: [PATCH 1364/1442] xfs: remove racy hasattr check from attr ops commit 5a93790d4e2df73e30c965ec6e49be82fc3ccfce upstream. xfs_attr_[get|remove]() have unlocked attribute fork checks to optimize away a lock cycle in cases where the fork does not exist or is otherwise empty. This check is not safe, however, because an attribute fork short form to extent format conversion includes a transient state that causes the xfs_inode_hasattr() check to fail. Specifically, xfs_attr_shortform_to_leaf() creates an empty extent format attribute fork and then adds the existing shortform attributes to it. This means that lookup of an existing xattr can spuriously return -ENOATTR when racing against a setxattr that causes the associated format conversion. This was originally reproduced by an untar on a particularly configured glusterfs volume, but can also be reproduced on demand with properly crafted xattr requests. The format conversion occurs under the exclusive ilock. xfs_attr_get() and xfs_attr_remove() already have the proper locking and checks further down in the functions to handle this situation correctly. Drop the unlocked checks to avoid the spurious failure and rely on the existing logic. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_attr.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index af1ecb19121e..6622d46ddec3 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -131,9 +131,6 @@ xfs_attr_get( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if (!xfs_inode_hasattr(ip)) - return -ENOATTR; - error = xfs_attr_args_init(&args, ip, name, flags); if (error) return error; @@ -392,9 +389,6 @@ xfs_attr_remove( if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - if (!xfs_inode_hasattr(dp)) - return -ENOATTR; - error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; -- GitLab From 29f96b7e9023929f8bd80b5e1f53d4e6db3c434f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Feb 2017 08:56:09 +0100 Subject: [PATCH 1365/1442] xfs: extsize hints are not unlikely in xfs_bmap_btalloc commit 493611ebd62673f39e2f52c2561182c558a21cb6 upstream. With COW files they are the hotpath, just like for files with the extent size hint attribute. We really shouldn't micro-manage anything but failure cases with unlikely. Additionally Arnd Bergmann recently reported that one of these two unlikely annotations causes link failures together with an upcoming kernel instrumentation patch, so let's get rid of it ASAP. Signed-off-by: Christoph Hellwig Reported-by: Arnd Bergmann Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index fbb60d30089c..f52fd63fce19 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3720,7 +3720,7 @@ xfs_bmap_btalloc( align = xfs_get_cowextsz_hint(ap->ip); else if (xfs_alloc_is_userdata(ap->datatype)) align = xfs_get_extsz_hint(ap->ip); - if (unlikely(align)) { + if (align) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, ap->eof, 0, ap->conv, &ap->offset, &ap->length); @@ -3792,7 +3792,7 @@ xfs_bmap_btalloc( args.minlen = ap->minlen; } /* apply extent size hints if obtained earlier */ - if (unlikely(align)) { + if (align) { args.prod = align; if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); -- GitLab From 5d44dd54bd57c6275d82d8912730c794fc8ec8ab Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Feb 2017 08:56:10 +0100 Subject: [PATCH 1366/1442] xfs: clear _XBF_PAGES from buffers when readahead page commit 2aa6ba7b5ad3189cc27f14540aa2f57f0ed8df4b upstream. If we try to allocate memory pages to back an xfs_buf that we're trying to read, it's possible that we'll be so short on memory that the page allocation fails. For a blocking read we'll just wait, but for readahead we simply dump all the pages we've collected so far. Unfortunately, after dumping the pages we neglect to clear the _XBF_PAGES state, which means that the subsequent call to xfs_buf_free thinks that b_pages still points to pages we own. It then double-frees the b_pages pages. This results in screaming about negative page refcounts from the memory manager, which xfs oughtn't be triggering. To reproduce this case, mount a filesystem where the size of the inodes far outweighs the availalble memory (a ~500M inode filesystem on a VM with 300MB memory did the trick here) and run bulkstat in parallel with other memory eating processes to put a huge load on the system. The "check summary" phase of xfs_scrub also works for this purpose. Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_buf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b5b9bffe3520..d7a67d7fbc7f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -423,6 +423,7 @@ xfs_buf_allocate_memory( out_free_pages: for (i = 0; i < bp->b_page_count; i++) __free_page(bp->b_pages[i]); + bp->b_flags &= ~_XBF_PAGES; return error; } -- GitLab From b5b4d4a9141e15ea8d887d88d9763cf190955907 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Feb 2017 08:56:11 +0100 Subject: [PATCH 1367/1442] xfs: fix bmv_count confusion w/ shared extents commit c364b6d0b6cda1cd5d9ab689489adda3e82529aa upstream. In a bmapx call, bmv_count is the total size of the array, including the zeroth element that userspace uses to supply the search key. The output array starts at offset 1 so that we can set up the user for the next invocation. Since we now can split an extent into multiple bmap records due to shared/unshared status, we have to be careful that we don't overflow the output array. In the original patch f86f403794b ("xfs: teach get_bmapx about shared extents and the CoW fork") I used cur_ext (the output index) to check for overflows, albeit with an off-by-one error. Since nexleft no longer describes the number of unfilled slots in the output, we can rip all that out and use cur_ext for the overflow check directly. Failure to do this causes heap corruption in bmapx callers such as xfs_io and xfs_scrub. xfs/328 can reproduce this problem. Reviewed-by: Eric Sandeen Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_bmap_util.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 0670a8bd5818..efb8ccd6bbf2 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -528,7 +528,6 @@ xfs_getbmap( xfs_bmbt_irec_t *map; /* buffer for user's data */ xfs_mount_t *mp; /* file system mount point */ int nex; /* # of user extents can do */ - int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ struct getbmapx *out; /* output structure */ @@ -686,10 +685,8 @@ xfs_getbmap( goto out_free_map; } - nexleft = nex; - do { - nmap = (nexleft > subnex) ? subnex : nexleft; + nmap = (nex> subnex) ? subnex : nex; error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), XFS_BB_TO_FSB(mp, bmv->bmv_length), map, &nmap, bmapi_flags); @@ -697,8 +694,8 @@ xfs_getbmap( goto out_free_map; ASSERT(nmap <= subnex); - for (i = 0; i < nmap && nexleft && bmv->bmv_length && - cur_ext < bmv->bmv_count; i++) { + for (i = 0; i < nmap && bmv->bmv_length && + cur_ext < bmv->bmv_count - 1; i++) { out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; @@ -760,16 +757,27 @@ xfs_getbmap( continue; } + /* + * In order to report shared extents accurately, + * we report each distinct shared/unshared part + * of a single bmbt record using multiple bmap + * extents. To make that happen, we iterate the + * same map array item multiple times, each + * time trimming out the subextent that we just + * reported. + * + * Because of this, we must check the out array + * index (cur_ext) directly against bmv_count-1 + * to avoid overflows. + */ if (inject_map.br_startblock != NULLFSBLOCK) { map[i] = inject_map; i--; - } else - nexleft--; + } bmv->bmv_entries++; cur_ext++; } - } while (nmap && nexleft && bmv->bmv_length && - cur_ext < bmv->bmv_count); + } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); out_free_map: kmem_free(map); -- GitLab From c8ea2f3b8247b6046f02527127ecf2fd0b045f46 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 4 Feb 2017 09:47:29 +0100 Subject: [PATCH 1368/1442] Linux 4.9.8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index da704d903321..1130803ab93c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 7 +SUBLEVEL = 8 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From 1cbd16b51abc8cb31d536eda29856cdbef1a02a0 Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Tue, 31 Jan 2017 23:28:39 -0800 Subject: [PATCH 1369/1442] ANDROID: goldfish_sync: upgrade to new fence sync api goldfish_sync requires the following modifications to bring it up: - Copy and integrate goldfish_sync version of sw_sync, from out of dma-buf driver. - Don't delete timelines by itself; rely on put Change-Id: Ie88d506955dbf5c8532281f122471dc7b1c0bccb Signed-off-by: Lingfeng Yang --- drivers/staging/goldfish/Kconfig | 2 + drivers/staging/goldfish/Makefile | 1 + drivers/staging/goldfish/goldfish_sync.c | 104 +++---- .../goldfish/goldfish_sync_timeline_fence.c | 254 ++++++++++++++++++ .../goldfish/goldfish_sync_timeline_fence.h | 58 ++++ 5 files changed, 355 insertions(+), 64 deletions(-) create mode 100644 drivers/staging/goldfish/goldfish_sync_timeline_fence.c create mode 100644 drivers/staging/goldfish/goldfish_sync_timeline_fence.h diff --git a/drivers/staging/goldfish/Kconfig b/drivers/staging/goldfish/Kconfig index c579141a7bed..d293bbc22c79 100644 --- a/drivers/staging/goldfish/Kconfig +++ b/drivers/staging/goldfish/Kconfig @@ -7,6 +7,8 @@ config GOLDFISH_AUDIO config GOLDFISH_SYNC tristate "Goldfish AVD Sync Driver" depends on GOLDFISH + depends on SW_SYNC + depends on SYNC_FILE ---help--- Emulated sync fences for the Goldfish Android Virtual Device diff --git a/drivers/staging/goldfish/Makefile b/drivers/staging/goldfish/Makefile index 0cf525588210..fbebfb7c781c 100644 --- a/drivers/staging/goldfish/Makefile +++ b/drivers/staging/goldfish/Makefile @@ -8,4 +8,5 @@ obj-$(CONFIG_MTD_GOLDFISH_NAND) += goldfish_nand.o # and sync ccflags-y := -Idrivers/staging/android +obj-$(CONFIG_GOLDFISH_SYNC) += goldfish_sync_timeline_fence.o obj-$(CONFIG_GOLDFISH_SYNC) += goldfish_sync.o diff --git a/drivers/staging/goldfish/goldfish_sync.c b/drivers/staging/goldfish/goldfish_sync.c index ba8def29901e..aeccec1c51b1 100644 --- a/drivers/staging/goldfish/goldfish_sync.c +++ b/drivers/staging/goldfish/goldfish_sync.c @@ -30,10 +30,13 @@ #include #include + +#include #include +#include +#include -#include "sw_sync.h" -#include "sync.h" +#include "goldfish_sync_timeline_fence.h" #define ERR(...) printk(KERN_ERR __VA_ARGS__); @@ -45,13 +48,13 @@ /* The Goldfish sync driver is designed to provide a interface * between the underlying host's sync device and the kernel's - * sw_sync. + * fence sync framework.. * The purpose of the device/driver is to enable lightweight * creation and signaling of timelines and fences * in order to synchronize the guest with host-side graphics events. * * Each time the interrupt trips, the driver - * may perform a sw_sync operation. + * may perform a sync operation. */ /* The operations are: */ @@ -158,7 +161,7 @@ struct goldfish_sync_state { static struct goldfish_sync_state global_sync_state[1]; struct goldfish_sync_timeline_obj { - struct sw_sync_timeline *sw_sync_tl; + struct goldfish_sync_timeline *sync_tl; uint32_t current_time; /* We need to be careful about when we deallocate * this |goldfish_sync_timeline_obj| struct. @@ -166,10 +169,10 @@ struct goldfish_sync_timeline_obj { * consider the triggered host-side wait that may * still be in flight when the guest close()'s a * goldfish_sync device's sync context fd (and - * destroys the |sw_sync_tl| field above). + * destroys the |sync_tl| field above). * The host-side wait may raise IRQ * and tell the kernel to increment the timeline _after_ - * the |sw_sync_tl| has already been set to null. + * the |sync_tl| has already been set to null. * * From observations on OpenGL apps and CTS tests, this * happens at some very low probability upon context @@ -177,8 +180,8 @@ struct goldfish_sync_timeline_obj { * and it needs to be handled properly. Otherwise, * if we clean up the surrounding |goldfish_sync_timeline_obj| * too early, any |handle| field of any host->guest command - * might not even point to a null |sw_sync_tl| field, - * but to garbage memory or even a reclaimed |sw_sync_tl|. + * might not even point to a null |sync_tl| field, + * but to garbage memory or even a reclaimed |sync_tl|. * If we do not count such "pending waits" and kfree the object * immediately upon |goldfish_sync_timeline_destroy|, * we might get mysterous RCU stalls after running a long @@ -220,14 +223,14 @@ struct goldfish_sync_timeline_obj { }; /* We will call |delete_timeline_obj| when the last reference count - * of the kref is decremented. This deletes the sw_sync + * of the kref is decremented. This deletes the sync * timeline object along with the wrapper itself. */ static void delete_timeline_obj(struct kref* kref) { struct goldfish_sync_timeline_obj* obj = container_of(kref, struct goldfish_sync_timeline_obj, kref); - sync_timeline_destroy(&obj->sw_sync_tl->obj); - obj->sw_sync_tl = NULL; + goldfish_sync_timeline_put_internal(obj->sync_tl); + obj->sync_tl = NULL; kfree(obj); } @@ -245,21 +248,21 @@ goldfish_sync_timeline_create(void) { char timeline_name[256]; - struct sw_sync_timeline *res_sync_tl = NULL; + struct goldfish_sync_timeline *res_sync_tl = NULL; struct goldfish_sync_timeline_obj *res; DTRACE(); gensym(timeline_name); - res_sync_tl = sw_sync_timeline_create(timeline_name); + res_sync_tl = goldfish_sync_timeline_create_internal(timeline_name); if (!res_sync_tl) { - ERR("Failed to create sw_sync timeline."); + ERR("Failed to create goldfish_sw_sync timeline."); return NULL; } res = kzalloc(sizeof(struct goldfish_sync_timeline_obj), GFP_KERNEL); - res->sw_sync_tl = res_sync_tl; + res->sync_tl = res_sync_tl; res->current_time = 0; kref_init(&res->kref); @@ -277,19 +280,20 @@ goldfish_sync_fence_create(struct goldfish_sync_timeline_obj *obj, int fd; char fence_name[256]; struct sync_pt *syncpt = NULL; - struct sync_fence *sync_obj = NULL; - struct sw_sync_timeline *tl; + struct sync_file *sync_file_obj = NULL; + struct goldfish_sync_timeline *tl; DTRACE(); if (!obj) return -1; - tl = obj->sw_sync_tl; + tl = obj->sync_tl; - syncpt = sw_sync_pt_create(tl, val); + syncpt = goldfish_sync_pt_create_internal( + tl, sizeof(struct sync_pt) + 4, val); if (!syncpt) { ERR("could not create sync point! " - "sync_timeline=0x%p val=%d", + "goldfish_sync_timeline=0x%p val=%d", tl, val); return -1; } @@ -303,24 +307,26 @@ goldfish_sync_fence_create(struct goldfish_sync_timeline_obj *obj, gensym(fence_name); - sync_obj = sync_fence_create(fence_name, syncpt); - if (!sync_obj) { + sync_file_obj = sync_file_create(&syncpt->base); + if (!sync_file_obj) { ERR("could not create sync fence! " - "sync_timeline=0x%p val=%d sync_pt=0x%p", + "goldfish_sync_timeline=0x%p val=%d sync_pt=0x%p", tl, val, syncpt); goto err_cleanup_fd_pt; } - DPRINT("installing sync fence into fd %d sync_obj=0x%p", fd, sync_obj); - sync_fence_install(sync_obj, fd); + DPRINT("installing sync fence into fd %d sync_file_obj=0x%p", + fd, sync_file_obj); + fd_install(fd, sync_file_obj->file); kref_get(&obj->kref); return fd; err_cleanup_fd_pt: + fput(sync_file_obj->file); put_unused_fd(fd); err_cleanup_pt: - sync_pt_free(syncpt); + fence_put(&syncpt->base); return -1; } @@ -335,7 +341,7 @@ goldfish_sync_timeline_inc(struct goldfish_sync_timeline_obj *obj, uint32_t inc) if (!obj) return; DPRINT("timeline_obj=0x%p", obj); - sw_sync_timeline_inc(obj->sw_sync_tl, inc); + goldfish_sync_timeline_signal_internal(obj->sync_tl, inc); DPRINT("incremented timeline. increment max_time"); obj->current_time += inc; @@ -847,7 +853,8 @@ int goldfish_sync_probe(struct platform_device *pdev) return -ENODEV; } - sync_state->reg_base = devm_ioremap(&pdev->dev, ioresource->start, PAGE_SIZE); + sync_state->reg_base = + devm_ioremap(&pdev->dev, ioresource->start, PAGE_SIZE); if (sync_state->reg_base == NULL) { ERR("Could not ioremap"); return -ENOMEM; @@ -880,9 +887,11 @@ int goldfish_sync_probe(struct platform_device *pdev) struct goldfish_sync_hostcmd *batch_addr_hostcmd; struct goldfish_sync_guestcmd *batch_addr_guestcmd; - batch_addr_hostcmd = devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_hostcmd), + batch_addr_hostcmd = + devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_hostcmd), GFP_KERNEL); - batch_addr_guestcmd = devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_guestcmd), + batch_addr_guestcmd = + devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_guestcmd), GFP_KERNEL); if (!setup_verify_batch_cmd_addr(sync_state, @@ -952,36 +961,3 @@ MODULE_AUTHOR("Google, Inc."); MODULE_DESCRIPTION("Android QEMU Sync Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0"); - -/* This function is only to run a basic test of sync framework. - * It creates a timeline and fence object whose signal point is at 1. - * The timeline is incremented, and we use the sync framework's - * sync_fence_wait on that fence object. If everything works out, - * we should not hang in the wait and return immediately. - * There is no way to explicitly run this test yet, but it - * can be used by inserting it at the end of goldfish_sync_probe. - */ -void test_kernel_sync(void) -{ - struct goldfish_sync_timeline_obj *test_timeline; - int test_fence_fd; - - DTRACE(); - - DPRINT("test sw_sync"); - - test_timeline = goldfish_sync_timeline_create(); - DPRINT("sw_sync_timeline_create -> 0x%p", test_timeline); - - test_fence_fd = goldfish_sync_fence_create(test_timeline, 1); - DPRINT("sync_fence_create -> %d", test_fence_fd); - - DPRINT("incrementing test timeline"); - goldfish_sync_timeline_inc(test_timeline, 1); - - DPRINT("test waiting (should NOT hang)"); - sync_fence_wait( - sync_fence_fdget(test_fence_fd), -1); - - DPRINT("test waiting (afterward)"); -} diff --git a/drivers/staging/goldfish/goldfish_sync_timeline_fence.c b/drivers/staging/goldfish/goldfish_sync_timeline_fence.c new file mode 100644 index 000000000000..e671618cf888 --- /dev/null +++ b/drivers/staging/goldfish/goldfish_sync_timeline_fence.c @@ -0,0 +1,254 @@ +#include +#include +#include +#include +#include + +#include "goldfish_sync_timeline_fence.h" + +/* + * Timeline-based sync for Goldfish Sync + * Based on "Sync File validation framework" + * (drivers/dma-buf/sw_sync.c) + * + * Copyright (C) 2017 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/** + * struct goldfish_sync_timeline - sync object + * @kref: reference count on fence. + * @name: name of the goldfish_sync_timeline. Useful for debugging + * @child_list_head: list of children sync_pts for this goldfish_sync_timeline + * @child_list_lock: lock protecting @child_list_head and fence.status + * @active_list_head: list of active (unsignaled/errored) sync_pts + */ +struct goldfish_sync_timeline { + struct kref kref; + char name[32]; + + /* protected by child_list_lock */ + u64 context; + int value; + + struct list_head child_list_head; + spinlock_t child_list_lock; + + struct list_head active_list_head; +}; + +static inline struct goldfish_sync_timeline *fence_parent(struct fence *fence) +{ + return container_of(fence->lock, struct goldfish_sync_timeline, + child_list_lock); +} + +static const struct fence_ops goldfish_sync_timeline_fence_ops; + +static inline struct sync_pt *goldfish_sync_fence_to_sync_pt(struct fence *fence) +{ + if (fence->ops != &goldfish_sync_timeline_fence_ops) + return NULL; + return container_of(fence, struct sync_pt, base); +} + +/** + * goldfish_sync_timeline_create_internal() - creates a sync object + * @name: sync_timeline name + * + * Creates a new sync_timeline. Returns the sync_timeline object or NULL in + * case of error. + */ +struct goldfish_sync_timeline +*goldfish_sync_timeline_create_internal(const char *name) +{ + struct goldfish_sync_timeline *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return NULL; + + kref_init(&obj->kref); + obj->context = fence_context_alloc(1); + strlcpy(obj->name, name, sizeof(obj->name)); + + INIT_LIST_HEAD(&obj->child_list_head); + INIT_LIST_HEAD(&obj->active_list_head); + spin_lock_init(&obj->child_list_lock); + + return obj; +} + +static void goldfish_sync_timeline_free_internal(struct kref *kref) +{ + struct goldfish_sync_timeline *obj = + container_of(kref, struct goldfish_sync_timeline, kref); + + kfree(obj); +} + +static void goldfish_sync_timeline_get_internal( + struct goldfish_sync_timeline *obj) +{ + kref_get(&obj->kref); +} + +void goldfish_sync_timeline_put_internal(struct goldfish_sync_timeline *obj) +{ + kref_put(&obj->kref, goldfish_sync_timeline_free_internal); +} + +/** + * goldfish_sync_timeline_signal() - + * signal a status change on a goldfish_sync_timeline + * @obj: sync_timeline to signal + * @inc: num to increment on timeline->value + * + * A sync implementation should call this any time one of it's fences + * has signaled or has an error condition. + */ +void goldfish_sync_timeline_signal_internal(struct goldfish_sync_timeline *obj, + unsigned int inc) +{ + unsigned long flags; + struct sync_pt *pt, *next; + + spin_lock_irqsave(&obj->child_list_lock, flags); + + obj->value += inc; + + list_for_each_entry_safe(pt, next, &obj->active_list_head, + active_list) { + if (fence_is_signaled_locked(&pt->base)) + list_del_init(&pt->active_list); + } + + spin_unlock_irqrestore(&obj->child_list_lock, flags); +} + +/** + * goldfish_sync_pt_create_internal() - creates a sync pt + * @parent: fence's parent sync_timeline + * @size: size to allocate for this pt + * @inc: value of the fence + * + * Creates a new sync_pt as a child of @parent. @size bytes will be + * allocated allowing for implementation specific data to be kept after + * the generic sync_timeline struct. Returns the sync_pt object or + * NULL in case of error. + */ +struct sync_pt *goldfish_sync_pt_create_internal( + struct goldfish_sync_timeline *obj, int size, + unsigned int value) +{ + unsigned long flags; + struct sync_pt *pt; + + if (size < sizeof(*pt)) + return NULL; + + pt = kzalloc(size, GFP_KERNEL); + if (!pt) + return NULL; + + spin_lock_irqsave(&obj->child_list_lock, flags); + goldfish_sync_timeline_get_internal(obj); + fence_init(&pt->base, &goldfish_sync_timeline_fence_ops, &obj->child_list_lock, + obj->context, value); + list_add_tail(&pt->child_list, &obj->child_list_head); + INIT_LIST_HEAD(&pt->active_list); + spin_unlock_irqrestore(&obj->child_list_lock, flags); + return pt; +} + +static const char *goldfish_sync_timeline_fence_get_driver_name( + struct fence *fence) +{ + return "sw_sync"; +} + +static const char *goldfish_sync_timeline_fence_get_timeline_name( + struct fence *fence) +{ + struct goldfish_sync_timeline *parent = fence_parent(fence); + + return parent->name; +} + +static void goldfish_sync_timeline_fence_release(struct fence *fence) +{ + struct sync_pt *pt = goldfish_sync_fence_to_sync_pt(fence); + struct goldfish_sync_timeline *parent = fence_parent(fence); + unsigned long flags; + + spin_lock_irqsave(fence->lock, flags); + list_del(&pt->child_list); + if (!list_empty(&pt->active_list)) + list_del(&pt->active_list); + spin_unlock_irqrestore(fence->lock, flags); + + goldfish_sync_timeline_put_internal(parent); + fence_free(fence); +} + +static bool goldfish_sync_timeline_fence_signaled(struct fence *fence) +{ + struct goldfish_sync_timeline *parent = fence_parent(fence); + + return (fence->seqno > parent->value) ? false : true; +} + +static bool goldfish_sync_timeline_fence_enable_signaling(struct fence *fence) +{ + struct sync_pt *pt = goldfish_sync_fence_to_sync_pt(fence); + struct goldfish_sync_timeline *parent = fence_parent(fence); + + if (goldfish_sync_timeline_fence_signaled(fence)) + return false; + + list_add_tail(&pt->active_list, &parent->active_list_head); + return true; +} + +static void goldfish_sync_timeline_fence_disable_signaling(struct fence *fence) +{ + struct sync_pt *pt = container_of(fence, struct sync_pt, base); + + list_del_init(&pt->active_list); +} + +static void goldfish_sync_timeline_fence_value_str(struct fence *fence, + char *str, int size) +{ + snprintf(str, size, "%d", fence->seqno); +} + +static void goldfish_sync_timeline_fence_timeline_value_str( + struct fence *fence, + char *str, int size) +{ + struct goldfish_sync_timeline *parent = fence_parent(fence); + + snprintf(str, size, "%d", parent->value); +} + +static const struct fence_ops goldfish_sync_timeline_fence_ops = { + .get_driver_name = goldfish_sync_timeline_fence_get_driver_name, + .get_timeline_name = goldfish_sync_timeline_fence_get_timeline_name, + .enable_signaling = goldfish_sync_timeline_fence_enable_signaling, + .disable_signaling = goldfish_sync_timeline_fence_disable_signaling, + .signaled = goldfish_sync_timeline_fence_signaled, + .wait = fence_default_wait, + .release = goldfish_sync_timeline_fence_release, + .fence_value_str = goldfish_sync_timeline_fence_value_str, + .timeline_value_str = goldfish_sync_timeline_fence_timeline_value_str, +}; diff --git a/drivers/staging/goldfish/goldfish_sync_timeline_fence.h b/drivers/staging/goldfish/goldfish_sync_timeline_fence.h new file mode 100644 index 000000000000..fc25924652c1 --- /dev/null +++ b/drivers/staging/goldfish/goldfish_sync_timeline_fence.h @@ -0,0 +1,58 @@ +#include +#include + +/** + * struct sync_pt - sync_pt object + * @base: base fence object + * @child_list: sync timeline child's list + * @active_list: sync timeline active child's list + */ +struct sync_pt { + struct fence base; + struct list_head child_list; + struct list_head active_list; +}; + +/** + * goldfish_sync_timeline_create_internal() - creates a sync object + * @name: goldfish_sync_timeline name + * + * Creates a new goldfish_sync_timeline. + * Returns the goldfish_sync_timeline object or NULL in case of error. + */ +struct goldfish_sync_timeline +*goldfish_sync_timeline_create_internal(const char *name); + +/** + * goldfish_sync_pt_create_internal() - creates a sync pt + * @parent: fence's parent goldfish_sync_timeline + * @size: size to allocate for this pt + * @inc: value of the fence + * + * Creates a new sync_pt as a child of @parent. @size bytes will be + * allocated allowing for implementation specific data to be kept after + * the generic sync_timeline struct. Returns the sync_pt object or + * NULL in case of error. + */ +struct sync_pt +*goldfish_sync_pt_create_internal(struct goldfish_sync_timeline *obj, + int size, unsigned int value); + +/** + * goldfish_sync_timeline_signal_internal() - + * signal a status change on a sync_timeline + * @obj: goldfish_sync_timeline to signal + * @inc: num to increment on timeline->value + * + * A sync implementation should call this any time one of it's fences + * has signaled or has an error condition. + */ +void goldfish_sync_timeline_signal_internal(struct goldfish_sync_timeline *obj, + unsigned int inc); + +/** + * goldfish_sync_timeline_put_internal() - dec refcount of a sync_timeline + * and clean up memory if it was the last ref. + * @obj: goldfish_sync_timeline to decref + */ +void goldfish_sync_timeline_put_internal(struct goldfish_sync_timeline *obj); -- GitLab From ec6a764367a42674462536157afeadcb001a6f1e Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Wed, 1 Feb 2017 16:12:57 -0800 Subject: [PATCH 1370/1442] ANDROID: goldfish_sync: update defconfig for 4.9-compatible version Change-Id: I46dbf059f3028a280a872b9b46c2e587acc2585a Signed-off-by: Lingfeng Yang --- arch/x86/configs/i386_ranchu_defconfig | 2 ++ arch/x86/configs/x86_64_ranchu_defconfig | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig index 11181a63a311..a1c83c4e78ae 100644 --- a/arch/x86/configs/i386_ranchu_defconfig +++ b/arch/x86/configs/i386_ranchu_defconfig @@ -361,11 +361,13 @@ CONFIG_ASHMEM=y CONFIG_ANDROID_LOW_MEMORY_KILLER=y CONFIG_SYNC=y CONFIG_SW_SYNC=y +CONFIG_SYNC_FILE=y CONFIG_ION=y CONFIG_GOLDFISH_AUDIO=y CONFIG_SND_HDA_INTEL=y CONFIG_GOLDFISH=y CONFIG_GOLDFISH_PIPE=y +CONFIG_GOLDFISH_SYNC=y CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y CONFIG_ISCSI_IBFT_FIND=y diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig index a62b86769cde..d50434f501fb 100644 --- a/arch/x86/configs/x86_64_ranchu_defconfig +++ b/arch/x86/configs/x86_64_ranchu_defconfig @@ -358,11 +358,13 @@ CONFIG_ASHMEM=y CONFIG_ANDROID_LOW_MEMORY_KILLER=y CONFIG_SYNC=y CONFIG_SW_SYNC=y +CONFIG_SYNC_FILE=y CONFIG_ION=y CONFIG_GOLDFISH_AUDIO=y CONFIG_SND_HDA_INTEL=y CONFIG_GOLDFISH=y CONFIG_GOLDFISH_PIPE=y +CONFIG_GOLDFISH_SYNC=y CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y CONFIG_ISCSI_IBFT_FIND=y -- GitLab From d37a658513f50c46235eec5f41a432b645e8f767 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 7 Feb 2017 11:09:30 -0800 Subject: [PATCH 1371/1442] ANDROID: goldfish_sync: Isolate single module to fix compilation ERROR: "goldfish_sync_timeline_signal_internal" [drivers/staging/goldfish/goldfish_sync.ko] undefined! ERROR: "goldfish_sync_timeline_create_internal" [drivers/staging/goldfish/goldfish_sync.ko] undefined! ERROR: "goldfish_sync_pt_create_internal" [drivers/staging/goldfish/goldfish_sync.ko] undefined! ERROR: "goldfish_sync_timeline_put_internal" [drivers/staging/goldfish/goldfish_sync.ko] undefined! Change-Id: I2a97c2a33b38ceeb696d28187539c158aa97a620 Signed-off-by: Dmitry Shmidt --- drivers/staging/goldfish/Makefile | 2 +- .../goldfish/{goldfish_sync.c => goldfish_sync_timeline.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename drivers/staging/goldfish/{goldfish_sync.c => goldfish_sync_timeline.c} (100%) diff --git a/drivers/staging/goldfish/Makefile b/drivers/staging/goldfish/Makefile index fbebfb7c781c..3313fce4e940 100644 --- a/drivers/staging/goldfish/Makefile +++ b/drivers/staging/goldfish/Makefile @@ -8,5 +8,5 @@ obj-$(CONFIG_MTD_GOLDFISH_NAND) += goldfish_nand.o # and sync ccflags-y := -Idrivers/staging/android -obj-$(CONFIG_GOLDFISH_SYNC) += goldfish_sync_timeline_fence.o +goldfish_sync-objs := goldfish_sync_timeline_fence.o goldfish_sync_timeline.o obj-$(CONFIG_GOLDFISH_SYNC) += goldfish_sync.o diff --git a/drivers/staging/goldfish/goldfish_sync.c b/drivers/staging/goldfish/goldfish_sync_timeline.c similarity index 100% rename from drivers/staging/goldfish/goldfish_sync.c rename to drivers/staging/goldfish/goldfish_sync_timeline.c -- GitLab From f2dd18a00f1b20cc84aa3b5e99991fff789d2cfd Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 8 Feb 2017 13:11:26 -0800 Subject: [PATCH 1372/1442] ANDROID: goldfish_sync: Fix sync_file_obj is NULL but dereferenced problem Change-Id: I22c1e8c96a62ceaf3b0088b0b81dced6889334ab Signed-off-by: Dmitry Shmidt --- drivers/staging/goldfish/goldfish_sync_timeline.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/goldfish/goldfish_sync_timeline.c b/drivers/staging/goldfish/goldfish_sync_timeline.c index aeccec1c51b1..0dac94f5accd 100644 --- a/drivers/staging/goldfish/goldfish_sync_timeline.c +++ b/drivers/staging/goldfish/goldfish_sync_timeline.c @@ -323,7 +323,6 @@ goldfish_sync_fence_create(struct goldfish_sync_timeline_obj *obj, return fd; err_cleanup_fd_pt: - fput(sync_file_obj->file); put_unused_fd(fd); err_cleanup_pt: fence_put(&syncpt->base); -- GitLab From ac82d16f334316179826c93cd61abe838812b7db Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 20 Sep 2016 18:42:22 -0700 Subject: [PATCH 1373/1442] ANDROID: sched: Add Kconfig option DEFAULT_USE_ENERGY_AWARE to set ENERGY_AWARE feature flag The ENERGY_AWARE sched feature flag cannot be set unless CONFIG_SCHED_DEBUG is enabled. So this patch allows the flag to default to true at build time if the config is set. Change-Id: I8835a571fdb7a8f8ee6a54af1e11a69f3b5ce8e6 Signed-off-by: John Stultz --- init/Kconfig | 10 ++++++++++ kernel/sched/features.h | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index d7dd52f8ec23..2e15b932aeea 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1360,6 +1360,16 @@ config SCHED_TUNE If unsure, say N. +config DEFAULT_USE_ENERGY_AWARE + bool "Default to enabling the Energy Aware Scheduler feature" + default n + help + This option defaults the ENERGY_AWARE scheduling feature to true, + as without SCHED_DEBUG set this feature can't be enabled or disabled + via sysctl. + + Say N if unsure. + config SYSFS_DEPRECATED bool "Enable deprecated sysfs features to support old userspace tools" depends on SYSFS diff --git a/kernel/sched/features.h b/kernel/sched/features.h index b634151ce286..55e461055332 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -73,4 +73,8 @@ SCHED_FEAT(ATTACH_AGE_LOAD, true) * Energy aware scheduling. Use platform energy model to guide scheduling * decisions optimizing for energy efficiency. */ +#ifdef CONFIG_DEFAULT_USE_ENERGY_AWARE +SCHED_FEAT(ENERGY_AWARE, true) +#else SCHED_FEAT(ENERGY_AWARE, false) +#endif -- GitLab From 610c2b7ff8f6d5cbad76bbe522f7f367d7116b0f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jan 2017 15:00:45 -0600 Subject: [PATCH 1374/1442] PCI/ASPM: Handle PCI-to-PCIe bridges as roots of PCIe hierarchies commit 030305d69fc6963c16003f50d7e8d74b02d0a143 upstream. In a struct pcie_link_state, link->root points to the pcie_link_state of the root of the PCIe hierarchy. For the topmost link, this points to itself (link->root = link). For others, we copy the pointer from the parent (link->root = link->parent->root). Previously we recognized that Root Ports originated PCIe hierarchies, but we treated PCI/PCI-X to PCIe Bridges as being in the middle of the hierarchy, and when we tried to copy the pointer from link->parent->root, there was no parent, and we dereferenced a NULL pointer: BUG: unable to handle kernel NULL pointer dereference at 0000000000000090 IP: [] pcie_aspm_init_link_state+0x170/0x820 Recognize that PCI/PCI-X to PCIe Bridges originate PCIe hierarchies just like Root Ports do, so link->root for these devices should also point to itself. Fixes: 51ebfc92b72b ("PCI: Enumerate switches below PCI-to-PCIe bridges") Link: https://bugzilla.kernel.org/show_bug.cgi?id=193411 Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1022181 Tested-by: lists@ssl-mail.com Tested-by: Jayachandran C. Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aspm.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 0ec649d961d7..b0916b126923 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -518,25 +518,32 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev) link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) return NULL; + INIT_LIST_HEAD(&link->sibling); INIT_LIST_HEAD(&link->children); INIT_LIST_HEAD(&link->link); link->pdev = pdev; - if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) { + + /* + * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe + * hierarchies. + */ + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(pdev) == PCI_EXP_TYPE_PCIE_BRIDGE) { + link->root = link; + } else { struct pcie_link_state *parent; + parent = pdev->bus->parent->self->link_state; if (!parent) { kfree(link); return NULL; } + link->parent = parent; + link->root = link->parent->root; list_add(&link->link, &parent->children); } - /* Setup a pointer to the root port link */ - if (!link->parent) - link->root = link; - else - link->root = link->parent->root; list_add(&link->sibling, &link_list); pdev->link_state = link; -- GitLab From 13e6ef99d23b05807e7f8a72f45e3d8260b61570 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Thu, 1 Dec 2016 15:08:37 -0500 Subject: [PATCH 1375/1442] ext4: validate s_first_meta_bg at mount time commit 3a4b77cd47bb837b8557595ec7425f281f2ca1fe upstream. Ralf Spenneberg reported that he hit a kernel crash when mounting a modified ext4 image. And it turns out that kernel crashed when calculating fs overhead (ext4_calculate_overhead()), this is because the image has very large s_first_meta_bg (debug code shows it's 842150400), and ext4 overruns the memory in count_overhead() when setting bitmap buffer, which is PAGE_SIZE. ext4_calculate_overhead(): buf = get_zeroed_page(GFP_NOFS); <=== PAGE_SIZE buffer blks = count_overhead(sb, i, buf); count_overhead(): for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { <=== j = 842150400 ext4_set_bit(EXT4_B2C(sbi, s++), buf); <=== buffer overrun count++; } This can be reproduced easily for me by this script: #!/bin/bash rm -f fs.img mkdir -p /mnt/ext4 fallocate -l 16M fs.img mke2fs -t ext4 -O bigalloc,meta_bg,^resize_inode -F fs.img debugfs -w -R "ssv first_meta_bg 842150400" fs.img mount -o loop fs.img /mnt/ext4 Fix it by validating s_first_meta_bg first at mount time, and refusing to mount if its value exceeds the largest possible meta_bg number. Reported-by: Ralf Spenneberg Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 478630af0d19..bbc316db9495 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3827,6 +3827,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); + if (ext4_has_feature_meta_bg(sb)) { + if (le32_to_cpu(es->s_first_meta_bg) >= db_count) { + ext4_msg(sb, KERN_WARNING, + "first meta block group too large: %u " + "(group descriptor block count %u)", + le32_to_cpu(es->s_first_meta_bg), db_count); + goto failed_mount; + } + } sbi->s_group_desc = ext4_kvmalloc(db_count * sizeof(struct buffer_head *), GFP_KERNEL); -- GitLab From f0c7412edfc117e7792b8b1f5ee201b31cf9dcdb Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 27 Jan 2017 22:25:52 +0000 Subject: [PATCH 1376/1442] x86/efi: Always map the first physical page into the EFI pagetables commit bf29bddf0417a4783da3b24e8c9e017ac649326f upstream. Commit: 129766708 ("x86/efi: Only map RAM into EFI page tables if in mixed-mode") stopped creating 1:1 mappings for all RAM, when running in native 64-bit mode. It turns out though that there are 64-bit EFI implementations in the wild (this particular problem has been reported on a Lenovo Yoga 710-11IKB), which still make use of the first physical page for their own private use, even though they explicitly mark it EFI_CONVENTIONAL_MEMORY in the memory map. In case there is no mapping for this particular frame in the EFI pagetables, as soon as firmware tries to make use of it, a triple fault occurs and the system reboots (in case of the Yoga 710-11IKB this is very early during bootup). Fix that by always mapping the first page of physical memory into the EFI pagetables. We're free to hand this page to the BIOS, as trim_bios_range() will reserve the first page and isolate it away from memory allocators anyway. Note that just reverting 129766708 alone is not enough on v4.9-rc1+ to fix the regression on affected hardware, as this commit: ab72a27da ("x86/efi: Consolidate region mapping logic") later made the first physical frame not to be mapped anyway. Reported-by: Hanka Pavlikova Signed-off-by: Jiri Kosina Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Borislav Petkov Cc: Laura Abbott Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vojtech Pavlik Cc: Waiman Long Cc: linux-efi@vger.kernel.org Fixes: 129766708 ("x86/efi: Only map RAM into EFI page tables if in mixed-mode") Link: http://lkml.kernel.org/r/20170127222552.22336-1-matt@codeblueprint.co.uk [ Tidied up the changelog and the comment. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/platform/efi/efi_64.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 319148bd4b05..2f25a363068c 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -268,6 +268,22 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.use_pgd = true; + /* + * Certain firmware versions are way too sentimential and still believe + * they are exclusive and unquestionable owners of the first physical page, + * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY + * (but then write-access it later during SetVirtualAddressMap()). + * + * Create a 1:1 mapping for this page, to avoid triple faults during early + * boot with such firmware. We are free to hand this page to the BIOS, + * as trim_bios_range() will reserve the first page and isolate it away + * from memory allocators anyway. + */ + if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, _PAGE_RW)) { + pr_err("Failed to create 1:1 mapping for the first page!\n"); + return 1; + } + /* * When making calls to the firmware everything needs to be 1:1 * mapped and addressable with 32-bit pointers. Map the kernel -- GitLab From f2e24dd918189b355a2f7eeafede54778d18172d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Feb 2017 17:45:02 +0000 Subject: [PATCH 1377/1442] efi/fdt: Avoid FDT manipulation after ExitBootServices() commit c8f325a59cfc718d13a50fbc746ed9b415c25e92 upstream. Some AArch64 UEFI implementations disable the MMU in ExitBootServices(), after which unaligned accesses to RAM are no longer supported. Commit: abfb7b686a3e ("efi/libstub/arm*: Pass latest memory map to the kernel") fixed an issue in the memory map handling of the stub FDT code, but inadvertently created an issue with such firmware, by moving some of the FDT manipulation to after the invocation of ExitBootServices(). Given that the stub's libfdt implementation uses the ordinary, accelerated string functions, which rely on hardware handling of unaligned accesses, manipulating the FDT with the MMU off may result in alignment faults. So fix the situation by moving the update_fdt_memmap() call into the callback function invoked by efi_exit_boot_services() right before it calls the ExitBootServices() UEFI service (which is arguably a better place for it anyway) Note that disabling the MMU in ExitBootServices() is not compliant with the UEFI spec, and carries great risk due to the fact that switching from cached to uncached memory accesses halfway through compiler generated code (i.e., involving a stack) can never be done in a way that is architecturally safe. Fixes: abfb7b686a3e ("efi/libstub/arm*: Pass latest memory map to the kernel") Signed-off-by: Ard Biesheuvel Tested-by: Riku Voipio Cc: mark.rutland@arm.com Cc: linux-efi@vger.kernel.org Cc: matt@codeblueprint.co.uk Cc: leif.lindholm@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1485971102-23330-2-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/fdt.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 921dfa047202..260c4b4b492e 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -187,6 +187,7 @@ static efi_status_t update_fdt_memmap(void *fdt, struct efi_boot_memmap *map) struct exit_boot_struct { efi_memory_desc_t *runtime_map; int *runtime_entry_count; + void *new_fdt_addr; }; static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, @@ -202,7 +203,7 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, efi_get_virtmap(*map->map, *map->map_size, *map->desc_size, p->runtime_map, p->runtime_entry_count); - return EFI_SUCCESS; + return update_fdt_memmap(p->new_fdt_addr, map); } /* @@ -300,22 +301,13 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, priv.runtime_map = runtime_map; priv.runtime_entry_count = &runtime_entry_count; + priv.new_fdt_addr = (void *)*new_fdt_addr; status = efi_exit_boot_services(sys_table, handle, &map, &priv, exit_boot_func); if (status == EFI_SUCCESS) { efi_set_virtual_address_map_t *svam; - status = update_fdt_memmap((void *)*new_fdt_addr, &map); - if (status != EFI_SUCCESS) { - /* - * The kernel won't get far without the memory map, but - * may still be able to print something meaningful so - * return success here. - */ - return EFI_SUCCESS; - } - /* Install the new virtual address map */ svam = sys_table->runtime->set_virtual_address_map; status = svam(runtime_entry_count * desc_size, desc_size, -- GitLab From dfd713307adb8747b47cb7bf6c08cb887b857c43 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 31 Jan 2017 18:35:37 -0800 Subject: [PATCH 1378/1442] xtensa: fix noMMU build on cores with MMU commit 4b3e6f2ef3722f1a6a97b6034ed492c1a21fd4ae upstream. Commit bf15f86b343ed8 ("xtensa: initialize MMU before jumping to reset vector") calls MMU management functions even when CONFIG_MMU is not selected. That breaks noMMU build on cores with MMU. Don't manage MMU when CONFIG_MMU is not selected. Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 88a044af7504..32cdc2c52e98 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -540,7 +540,7 @@ subsys_initcall(topology_init); void cpu_reset(void) { -#if XCHAL_HAVE_PTP_MMU +#if XCHAL_HAVE_PTP_MMU && IS_ENABLED(CONFIG_MMU) local_irq_disable(); /* * We have full MMU: all autoload ways, ways 7, 8 and 9 of DTLB must -- GitLab From a18c4584a48931e8048508469bcdb53c6082221a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Jan 2017 11:26:38 +0100 Subject: [PATCH 1379/1442] HID: cp2112: fix sleep-while-atomic commit 7a7b5df84b6b4e5d599c7289526eed96541a0654 upstream. A recent commit fixing DMA-buffers on stack added a shared transfer buffer protected by a spinlock. This is broken as the USB HID request callbacks can sleep. Fix this up by replacing the spinlock with a mutex. Fixes: 1ffb3c40ffb5 ("HID: cp2112: make transfer buffers DMA capable") Signed-off-by: Johan Hovold Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-cp2112.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 60d30203a5fa..ffc941c40a0f 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -167,7 +167,7 @@ struct cp2112_device { atomic_t xfer_avail; struct gpio_chip gc; u8 *in_out_buffer; - spinlock_t lock; + struct mutex lock; }; static int gpio_push_pull = 0xFF; @@ -179,10 +179,9 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, @@ -206,7 +205,7 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) ret = 0; exit: - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); return ret <= 0 ? ret : -EIO; } @@ -215,10 +214,9 @@ static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); buf[0] = CP2112_GPIO_SET; buf[1] = value ? 0xff : 0; @@ -230,7 +228,7 @@ static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) if (ret < 0) hid_err(hdev, "error setting GPIO values: %d\n", ret); - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); } static int cp2112_gpio_get(struct gpio_chip *chip, unsigned offset) @@ -238,10 +236,9 @@ static int cp2112_gpio_get(struct gpio_chip *chip, unsigned offset) struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); ret = hid_hw_raw_request(hdev, CP2112_GPIO_GET, buf, CP2112_GPIO_GET_LENGTH, HID_FEATURE_REPORT, @@ -255,7 +252,7 @@ static int cp2112_gpio_get(struct gpio_chip *chip, unsigned offset) ret = (buf[1] >> offset) & 1; exit: - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); return ret; } @@ -266,10 +263,9 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip, struct cp2112_device *dev = gpiochip_get_data(chip); struct hid_device *hdev = dev->hdev; u8 *buf = dev->in_out_buffer; - unsigned long flags; int ret; - spin_lock_irqsave(&dev->lock, flags); + mutex_lock(&dev->lock); ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT, @@ -290,7 +286,7 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip, goto fail; } - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); /* * Set gpio value when output direction is already set, @@ -301,7 +297,7 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip, return 0; fail: - spin_unlock_irqrestore(&dev->lock, flags); + mutex_unlock(&dev->lock); return ret < 0 ? ret : -EIO; } @@ -1057,7 +1053,7 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id) if (!dev->in_out_buffer) return -ENOMEM; - spin_lock_init(&dev->lock); + mutex_init(&dev->lock); ret = hid_parse(hdev); if (ret) { -- GitLab From 7396685a1bca323b96fd79b836ae22b7569d7068 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Jan 2017 11:26:39 +0100 Subject: [PATCH 1380/1442] HID: cp2112: fix gpio-callback error handling commit 8e9faa15469ed7c7467423db4c62aeed3ff4cae3 upstream. In case of a zero-length report, the gpio direction_input callback would currently return success instead of an errno. Fixes: 1ffb3c40ffb5 ("HID: cp2112: make transfer buffers DMA capable") Signed-off-by: Johan Hovold Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-cp2112.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index ffc941c40a0f..e06c1344c913 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -206,7 +206,7 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset) exit: mutex_unlock(&dev->lock); - return ret <= 0 ? ret : -EIO; + return ret < 0 ? ret : -EIO; } static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value) -- GitLab From 20658b3df8b98cbdc433c824730471160a087afb Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 30 Jan 2017 12:35:28 +0100 Subject: [PATCH 1381/1442] pinctrl: baytrail: Add missing spinlock usage in byt_gpio_irq_handler commit cdca06e4e85974d8a3503ab15709dbbaf90d3dd1 upstream. According to VLI64 Intel Atom E3800 Specification Update (#329901) concurrent read accesses may result in returning 0xffffffff and write accesses may be dropped silently. To workaround all accesses must be protected by locks. Signed-off-by: Alexander Stein Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-baytrail.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 079015385fd8..7f2263d61063 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -1612,7 +1612,9 @@ static void byt_gpio_irq_handler(struct irq_desc *desc) continue; } + raw_spin_lock(&vg->lock); pending = readl(reg); + raw_spin_unlock(&vg->lock); for_each_set_bit(pin, &pending, 32) { virq = irq_find_mapping(vg->chip.irqdomain, base + pin); generic_handle_irq(virq); -- GitLab From c9fb422fd93883b65f1bb00e6e9206a5865245fc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 27 Jan 2017 10:31:52 -0500 Subject: [PATCH 1382/1442] drm/amdgpu/si: fix crash on headless asics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 57bcd0a6364cd4eaa362d7ff1777e88ddf501602 upstream. Missing check for crtcs present. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=193341 https://bugs.freedesktop.org/show_bug.cgi?id=99387 Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index b13c8aaec078..6df924f72f29 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -227,6 +227,9 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) } WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0); + if (adev->mode_info.num_crtc) + amdgpu_display_set_vga_render_state(adev, false); + gmc_v6_0_mc_stop(adev, &save); if (gmc_v6_0_wait_for_idle((void *)adev)) { @@ -256,7 +259,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } gmc_v6_0_mc_resume(adev, &save); - amdgpu_display_set_vga_render_state(adev, false); } static int gmc_v6_0_mc_init(struct amdgpu_device *adev) -- GitLab From bd5cefed1ccc94af1792cb5e07a043881f9a4207 Mon Sep 17 00:00:00 2001 From: Alastair Bridgewater Date: Wed, 11 Jan 2017 15:47:18 -0500 Subject: [PATCH 1383/1442] drm/nouveau/disp/gt215: Fix HDA ELD handling (thus, HDMI audio) on gt215 commit d347583a39e2df609a9e40c835f72d3614665b53 upstream. Store the ELD correctly, not just enough copies of the first byte to pad out the given ELD size. Signed-off-by: Alastair Bridgewater Fixes: 120b0c39c756 ("drm/nv50-/disp: audit and version SOR_HDA_ELD method") Reviewed-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c index 6f0436df0219..f8f2f16c22a2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c @@ -59,7 +59,7 @@ gt215_hda_eld(NV50_DISP_MTHD_V1) ); } for (i = 0; i < size; i++) - nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[0]); + nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[i]); for (; i < 0x60; i++) nvkm_wr32(device, 0x61c440 + soff, (i << 8)); nvkm_mask(device, 0x61c448 + soff, 0x80000003, 0x80000003); -- GitLab From 2eb8f7c4225a90f3266194535adc46b41dfc59e5 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Thu, 19 Jan 2017 22:56:30 -0500 Subject: [PATCH 1384/1442] drm/nouveau/nv1a,nv1f/disp: fix memory clock rate retrieval commit 24bf7ae359b8cca165bb30742d2b1c03a1eb23af upstream. Based on the xf86-video-nv code, NFORCE (NV1A) and NFORCE2 (NV1F) have a different way of retrieving clocks. See the nv_hw.c:nForceUpdateArbitrationSettings function in the original code for how these clocks were accessed. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=54587 Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/dispnv04/hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/dispnv04/hw.c b/drivers/gpu/drm/nouveau/dispnv04/hw.c index 74856a8b8f35..e64f52464ecf 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/hw.c +++ b/drivers/gpu/drm/nouveau/dispnv04/hw.c @@ -222,6 +222,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype) uint32_t mpllP; pci_read_config_dword(pci_get_bus_and_slot(0, 3), 0x6c, &mpllP); + mpllP = (mpllP >> 8) & 0xf; if (!mpllP) mpllP = 4; @@ -232,7 +233,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype) uint32_t clock; pci_read_config_dword(pci_get_bus_and_slot(0, 5), 0x4c, &clock); - return clock; + return clock / 1000; } ret = nouveau_hw_get_pllvals(dev, plltype, &pllvals); -- GitLab From b04a39f88cedc1bce345b458e8a9309ce8a2d1ba Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Fri, 13 Jan 2017 11:54:08 +0000 Subject: [PATCH 1385/1442] crypto: api - Clear CRYPTO_ALG_DEAD bit before registering an alg commit d6040764adcb5cb6de1489422411d701c158bb69 upstream. Make sure CRYPTO_ALG_DEAD bit is cleared before proceeding with the algorithm registration. This fixes qat-dh registration when driver is restarted Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/algapi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/algapi.c b/crypto/algapi.c index df939b54b09f..1fad2a6b3bbb 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -356,6 +356,7 @@ int crypto_register_alg(struct crypto_alg *alg) struct crypto_larval *larval; int err; + alg->cra_flags &= ~CRYPTO_ALG_DEAD; err = crypto_check_alg(alg); if (err) return err; -- GitLab From 53bed1f6402563da7cd56cb3d0a97780e091ab73 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 17 Jan 2017 13:46:29 +0000 Subject: [PATCH 1386/1442] crypto: arm64/aes-blk - honour iv_out requirement in CBC and CTR modes commit 11e3b725cfc282efe9d4a354153e99d86a16af08 upstream. Update the ARMv8 Crypto Extensions and the plain NEON AES implementations in CBC and CTR modes to return the next IV back to the skcipher API client. This is necessary for chaining to work correctly. Note that for CTR, this is only done if the request is a round multiple of the block size, since otherwise, chaining is impossible anyway. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-modes.S | 88 +++++++++++++++++------------------ 1 file changed, 42 insertions(+), 46 deletions(-) diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index c53dbeae79f2..838dad5c209f 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -193,15 +193,16 @@ AES_ENTRY(aes_cbc_encrypt) cbz w6, .Lcbcencloop ld1 {v0.16b}, [x5] /* get iv */ - enc_prepare w3, x2, x5 + enc_prepare w3, x2, x6 .Lcbcencloop: ld1 {v1.16b}, [x1], #16 /* get next pt block */ eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ - encrypt_block v0, w3, x2, x5, w6 + encrypt_block v0, w3, x2, x6, w7 st1 {v0.16b}, [x0], #16 subs w4, w4, #1 bne .Lcbcencloop + st1 {v0.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_encrypt) @@ -211,7 +212,7 @@ AES_ENTRY(aes_cbc_decrypt) cbz w6, .LcbcdecloopNx ld1 {v7.16b}, [x5] /* get iv */ - dec_prepare w3, x2, x5 + dec_prepare w3, x2, x6 .LcbcdecloopNx: #if INTERLEAVE >= 2 @@ -248,7 +249,7 @@ AES_ENTRY(aes_cbc_decrypt) .Lcbcdecloop: ld1 {v1.16b}, [x1], #16 /* get next ct block */ mov v0.16b, v1.16b /* ...and copy to v0 */ - decrypt_block v0, w3, x2, x5, w6 + decrypt_block v0, w3, x2, x6, w7 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ mov v7.16b, v1.16b /* ct is next iv */ st1 {v0.16b}, [x0], #16 @@ -256,6 +257,7 @@ AES_ENTRY(aes_cbc_decrypt) bne .Lcbcdecloop .Lcbcdecout: FRAME_POP + st1 {v7.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_decrypt) @@ -267,24 +269,15 @@ AES_ENDPROC(aes_cbc_decrypt) AES_ENTRY(aes_ctr_encrypt) FRAME_PUSH - cbnz w6, .Lctrfirst /* 1st time around? */ - umov x5, v4.d[1] /* keep swabbed ctr in reg */ - rev x5, x5 -#if INTERLEAVE >= 2 - cmn w5, w4 /* 32 bit overflow? */ - bcs .Lctrinc - add x5, x5, #1 /* increment BE ctr */ - b .LctrincNx -#else - b .Lctrinc -#endif -.Lctrfirst: + cbz w6, .Lctrnotfirst /* 1st time around? */ enc_prepare w3, x2, x6 ld1 {v4.16b}, [x5] - umov x5, v4.d[1] /* keep swabbed ctr in reg */ - rev x5, x5 + +.Lctrnotfirst: + umov x8, v4.d[1] /* keep swabbed ctr in reg */ + rev x8, x8 #if INTERLEAVE >= 2 - cmn w5, w4 /* 32 bit overflow? */ + cmn w8, w4 /* 32 bit overflow? */ bcs .Lctrloop .LctrloopNx: subs w4, w4, #INTERLEAVE @@ -292,11 +285,11 @@ AES_ENTRY(aes_ctr_encrypt) #if INTERLEAVE == 2 mov v0.8b, v4.8b mov v1.8b, v4.8b - rev x7, x5 - add x5, x5, #1 + rev x7, x8 + add x8, x8, #1 ins v0.d[1], x7 - rev x7, x5 - add x5, x5, #1 + rev x7, x8 + add x8, x8, #1 ins v1.d[1], x7 ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ do_encrypt_block2x @@ -305,7 +298,7 @@ AES_ENTRY(aes_ctr_encrypt) st1 {v0.16b-v1.16b}, [x0], #32 #else ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ - dup v7.4s, w5 + dup v7.4s, w8 mov v0.16b, v4.16b add v7.4s, v7.4s, v8.4s mov v1.16b, v4.16b @@ -323,18 +316,12 @@ AES_ENTRY(aes_ctr_encrypt) eor v2.16b, v7.16b, v2.16b eor v3.16b, v5.16b, v3.16b st1 {v0.16b-v3.16b}, [x0], #64 - add x5, x5, #INTERLEAVE + add x8, x8, #INTERLEAVE #endif - cbz w4, .LctroutNx -.LctrincNx: - rev x7, x5 + rev x7, x8 ins v4.d[1], x7 + cbz w4, .Lctrout b .LctrloopNx -.LctroutNx: - sub x5, x5, #1 - rev x7, x5 - ins v4.d[1], x7 - b .Lctrout .Lctr1x: adds w4, w4, #INTERLEAVE beq .Lctrout @@ -342,30 +329,39 @@ AES_ENTRY(aes_ctr_encrypt) .Lctrloop: mov v0.16b, v4.16b encrypt_block v0, w3, x2, x6, w7 + + adds x8, x8, #1 /* increment BE ctr */ + rev x7, x8 + ins v4.d[1], x7 + bcs .Lctrcarry /* overflow? */ + +.Lctrcarrydone: subs w4, w4, #1 bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ ld1 {v3.16b}, [x1], #16 eor v3.16b, v0.16b, v3.16b st1 {v3.16b}, [x0], #16 - beq .Lctrout -.Lctrinc: - adds x5, x5, #1 /* increment BE ctr */ - rev x7, x5 - ins v4.d[1], x7 - bcc .Lctrloop /* no overflow? */ - umov x7, v4.d[0] /* load upper word of ctr */ - rev x7, x7 /* ... to handle the carry */ - add x7, x7, #1 - rev x7, x7 - ins v4.d[0], x7 - b .Lctrloop + bne .Lctrloop + +.Lctrout: + st1 {v4.16b}, [x5] /* return next CTR value */ + FRAME_POP + ret + .Lctrhalfblock: ld1 {v3.8b}, [x1] eor v3.8b, v0.8b, v3.8b st1 {v3.8b}, [x0] -.Lctrout: FRAME_POP ret + +.Lctrcarry: + umov x7, v4.d[0] /* load upper word of ctr */ + rev x7, x7 /* ... to handle the carry */ + add x7, x7, #1 + rev x7, x7 + ins v4.d[0], x7 + b .Lctrcarrydone AES_ENDPROC(aes_ctr_encrypt) .ltorg -- GitLab From 3996a91e3bdc04b6e41d93a7de3e5c6b63db5cb3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Jan 2017 16:39:55 +0100 Subject: [PATCH 1387/1442] perf/core: Fix use-after-free bug commit a76a82a3e38c8d3fb6499e3dfaeb0949241ab588 upstream. Dmitry reported a KASAN use-after-free on event->group_leader. It turns out there's a hole in perf_remove_from_context() due to event_function_call() not calling its function when the task associated with the event is already dead. In this case the event will have been detached from the task, but the grouping will have been retained, such that group operations might still work properly while there are live child events etc. This does however mean that we can miss a perf_group_detach() call when the group decomposes, this in turn can then lead to use-after-free. Fix it by explicitly doing the group detach if its still required. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: syzkaller Fixes: 63b6da39bb38 ("perf: Fix perf_event_exit_task() race") Link: http://lkml.kernel.org/r/20170126153955.GD6515@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index e5a8839e7076..6e440c4d17fe 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1469,7 +1469,6 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) static void list_add_event(struct perf_event *event, struct perf_event_context *ctx) { - lockdep_assert_held(&ctx->lock); WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); @@ -1624,6 +1623,8 @@ static void perf_group_attach(struct perf_event *event) { struct perf_event *group_leader = event->group_leader, *pos; + lockdep_assert_held(&event->ctx->lock); + /* * We can have double attach due to group movement in perf_event_open. */ @@ -1697,6 +1698,8 @@ static void perf_group_detach(struct perf_event *event) struct perf_event *sibling, *tmp; struct list_head *list = NULL; + lockdep_assert_held(&event->ctx->lock); + /* * We can have double detach due to exit/hot-unplug + close. */ @@ -1895,9 +1898,29 @@ __perf_remove_from_context(struct perf_event *event, */ static void perf_remove_from_context(struct perf_event *event, unsigned long flags) { - lockdep_assert_held(&event->ctx->mutex); + struct perf_event_context *ctx = event->ctx; + + lockdep_assert_held(&ctx->mutex); event_function_call(event, __perf_remove_from_context, (void *)flags); + + /* + * The above event_function_call() can NO-OP when it hits + * TASK_TOMBSTONE. In that case we must already have been detached + * from the context (by perf_event_exit_event()) but the grouping + * might still be in-tact. + */ + WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); + if ((flags & DETACH_GROUP) && + (event->attach_state & PERF_ATTACH_GROUP)) { + /* + * Since in that case we cannot possibly be scheduled, simply + * detach now. + */ + raw_spin_lock_irq(&ctx->lock); + perf_group_detach(event); + raw_spin_unlock_irq(&ctx->lock); + } } /* -- GitLab From b41615aa705626e0fbb5541e4715090370fe8c23 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Jan 2017 23:15:08 +0100 Subject: [PATCH 1388/1442] perf/core: Fix PERF_RECORD_MMAP2 prot/flags for anonymous memory commit 0b3589be9b98994ce3d5aeca52445d1f5627c4ba upstream. Andres reported that MMAP2 records for anonymous memory always have their protection field 0. Turns out, someone daft put the prot/flags generation code in the file branch, leaving them unset for anonymous memory. Reported-by: Andres Freund Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Don Zickus Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@kernel.org Cc: anton@ozlabs.org Cc: namhyung@kernel.org Fixes: f972eb63b100 ("perf: Pass protection and flags bits through mmap2 interface") Link: http://lkml.kernel.org/r/20170126221508.GF6536@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6e440c4d17fe..b1cfd7416db0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6606,6 +6606,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) char *buf = NULL; char *name; + if (vma->vm_flags & VM_READ) + prot |= PROT_READ; + if (vma->vm_flags & VM_WRITE) + prot |= PROT_WRITE; + if (vma->vm_flags & VM_EXEC) + prot |= PROT_EXEC; + + if (vma->vm_flags & VM_MAYSHARE) + flags = MAP_SHARED; + else + flags = MAP_PRIVATE; + + if (vma->vm_flags & VM_DENYWRITE) + flags |= MAP_DENYWRITE; + if (vma->vm_flags & VM_MAYEXEC) + flags |= MAP_EXECUTABLE; + if (vma->vm_flags & VM_LOCKED) + flags |= MAP_LOCKED; + if (vma->vm_flags & VM_HUGETLB) + flags |= MAP_HUGETLB; + if (file) { struct inode *inode; dev_t dev; @@ -6632,27 +6653,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) maj = MAJOR(dev); min = MINOR(dev); - if (vma->vm_flags & VM_READ) - prot |= PROT_READ; - if (vma->vm_flags & VM_WRITE) - prot |= PROT_WRITE; - if (vma->vm_flags & VM_EXEC) - prot |= PROT_EXEC; - - if (vma->vm_flags & VM_MAYSHARE) - flags = MAP_SHARED; - else - flags = MAP_PRIVATE; - - if (vma->vm_flags & VM_DENYWRITE) - flags |= MAP_DENYWRITE; - if (vma->vm_flags & VM_MAYEXEC) - flags |= MAP_EXECUTABLE; - if (vma->vm_flags & VM_LOCKED) - flags |= MAP_LOCKED; - if (vma->vm_flags & VM_HUGETLB) - flags |= MAP_HUGETLB; - goto got_name; } else { if (vma->vm_ops && vma->vm_ops->name) { -- GitLab From fc794153c4074dff6487a2d22f1a23246c329bdf Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 12 Dec 2016 23:13:27 +0530 Subject: [PATCH 1389/1442] ata: sata_mv:- Handle return value of devm_ioremap. commit 064c3db9c564cc5be514ac21fb4aa26cc33db746 upstream. Here, If devm_ioremap will fail. It will return NULL. Then hpriv->base = NULL - 0x20000; Kernel can run into a NULL-pointer dereference. This error check will avoid NULL pointer dereference. Signed-off-by: Arvind Yadav Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/sata_mv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 823e938c9a78..2f32782cea6d 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -4132,6 +4132,9 @@ static int mv_platform_probe(struct platform_device *pdev) host->iomap = NULL; hpriv->base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!hpriv->base) + return -ENOMEM; + hpriv->base -= SATAHC0_REG_BASE; hpriv->clk = clk_get(&pdev->dev, NULL); -- GitLab From 6d08607ef35bb8abeb0a2212fbdf85e0af4d5df1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2017 11:48:50 -0500 Subject: [PATCH 1390/1442] libata: apply MAX_SEC_1024 to all CX1-JB*-HP devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e0edc8c546463f268d41d064d855bcff994c52fa upstream. Marko reports that CX1-JB512-HP shows the same timeout issues as CX1-JB256-HP. Let's apply MAX_SEC_128 to all devices in the series. Signed-off-by: Tejun Heo Reported-by: Marko Koski-Vähälä Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 223a770f78f3..1a57de7e1ba4 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4316,10 +4316,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST380013AS", "3.20", ATA_HORKAGE_MAX_SEC_1024 }, /* - * Device times out with higher max sects. + * These devices time out with higher max sects. * https://bugzilla.kernel.org/show_bug.cgi?id=121671 */ - { "LITEON CX1-JB256-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 }, + { "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 }, /* Devices we expect to fail diagnostics */ -- GitLab From 4b70d598c2ee1f47871ba12db832fcbbb4adb491 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 19 Dec 2016 10:17:40 +0900 Subject: [PATCH 1391/1442] libata: Fix ATA request sense commit 2dae99558e86894e9e5dbf097477baaa5eb70134 upstream. For an ATA device supporting the sense data reporting feature set, a failed command will trigger the execution of ata_eh_request_sense if the result task file of the failed command has the ATA_SENSE bit set (sense data available bit). ata_eh_request_sense executes the REQUEST SENSE DATA EXT command to retrieve the sense data of the failed command. On success of REQUEST SENSE DATA EXT, the ATA_SENSE bit will NOT be set (the command succeeded) but ata_eh_request_sense nevertheless tests the availability of sense data by testing that bit presence in the result tf of the REQUEST SENSE DATA EXT command. This leads us to falsely assume that request sense data failed and to the warning message: atax.xx: request sense failed stat 50 emask 0 Upon success of REQUEST SENSE DATA EXT, set the ATA_SENSE bit in the result task file command so that sense data can be returned by ata_eh_request_sense. Signed-off-by: Damien Le Moal Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 1a57de7e1ba4..33e363dcc63b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1695,6 +1695,8 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, if (qc->err_mask & ~AC_ERR_OTHER) qc->err_mask &= ~AC_ERR_OTHER; + } else if (qc->tf.command == ATA_CMD_REQ_SENSE_DATA) { + qc->result_tf.command |= ATA_SENSE; } /* finish up */ -- GitLab From 73d45909780e7f8632cbc58971c84a67a8622c7a Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 19 Jan 2017 10:10:16 +1100 Subject: [PATCH 1392/1442] powerpc/eeh: Fix wrong flag passed to eeh_unfreeze_pe() commit f05fea5b3574a5926c53865eea27139bb40b2f2b upstream. In __eeh_clear_pe_frozen_state(), we should pass the flag's value instead of its address to eeh_unfreeze_pe(). The isolated flag is cleared if no error returned from __eeh_clear_pe_frozen_state(). We never observed the error from the function. So the isolated flag should have been always cleared, no real issue is caused because of the misused @flag. This fixes the code by passing the value of @flag to eeh_unfreeze_pe(). Fixes: 5cfb20b96f6 ("powerpc/eeh: Emulate EEH recovery for VFIO devices") Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/eeh_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 5c31369435f2..a5dd493670a0 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -545,7 +545,7 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - bool *clear_sw_state = flag; + bool clear_sw_state = *(bool *)flag; int i, rc = 1; for (i = 0; rc && i < 3; i++) -- GitLab From bbf69e5197daf5560fa37894e50d943b2065496c Mon Sep 17 00:00:00 2001 From: Darren Stevens Date: Mon, 23 Jan 2017 19:42:54 +0000 Subject: [PATCH 1393/1442] powerpc: Add missing error check to prom_find_boot_cpu() commit af2b7fa17eb92e52b65f96604448ff7a2a89ee99 upstream. prom_init.c calls 'instance-to-package' twice, but the return is not checked during prom_find_boot_cpu(). The result is then passed to prom_getprop(), which could be PROM_ERROR. Add a return check to prevent this. This was found on a pasemi system, where CFE doesn't have a working 'instance-to package' prom call. Before Commit 5c0484e25ec0 ('powerpc: Endian safe trampoline') the area around addr 0 was mostly 0's and this doesn't cause a problem. Once the macro 'FIXUP_ENDIAN' has been added to head_64.S, the low memory area now has non-zero values, which cause the prom_getprop() call to hang. mpe: Also confirmed that under SLOF if 'instance-to-package' did fail with PROM_ERROR we would crash in SLOF. So the bug is not specific to CFE, it's just that other open firmwares don't trigger it because they have a working 'instance-to-package'. Fixes: 5c0484e25ec0 ("powerpc: Endian safe trampoline") Signed-off-by: Darren Stevens Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/prom_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 88ac964f4858..1e8c57207346 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2747,6 +2747,9 @@ static void __init prom_find_boot_cpu(void) cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); + if (!PHANDLE_VALID(cpu_pkg)) + return; + prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval)); prom.cpu = be32_to_cpu(rval); -- GitLab From 8f415333be3a448d9db57a9142fde7efe863432d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Jan 2017 16:36:57 +1100 Subject: [PATCH 1394/1442] powerpc: Fix build failure with clang due to BUILD_BUG_ON() commit b5fa0f7f88edcde37df1807fdf9ff10ec787a60e upstream. Anton says: In commit 4db7327194db ("powerpc: Add option to use jump label for cpu_has_feature()") and commit c12e6f24d413 ("powerpc: Add option to use jump label for mmu_has_feature()") we added: BUILD_BUG_ON(!__builtin_constant_p(feature)) to cpu_has_feature() and mmu_has_feature() in order to catch usage issues (such as cpu_has_feature(cpu_has_feature(X), which has happened once in the past). Unfortunately LLVM isn't smart enough to resolve this, and it errors out. I work around it in my clang/LLVM builds of the kernel, but I have just discovered that it causes a lot of issues for the bcc (eBPF) trace tool (which uses LLVM). For now just #ifdef it away for clang builds. Fixes: 4db7327194db ("powerpc: Add option to use jump label for cpu_has_feature()") Fixes: c12e6f24d413 ("powerpc: Add option to use jump label for mmu_has_feature()") Reported-by: Anton Blanchard Tested-by: Naveen N. Rao Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/cpu_has_feature.h | 2 ++ arch/powerpc/include/asm/mmu.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index b312b152461b..6e834caa3720 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -23,7 +23,9 @@ static __always_inline bool cpu_has_feature(unsigned long feature) { int i; +#ifndef __clang__ /* clang can't cope with this */ BUILD_BUG_ON(!__builtin_constant_p(feature)); +#endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG if (!static_key_initialized) { diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e311c25751a4..a244e09d2d88 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -160,7 +160,9 @@ static __always_inline bool mmu_has_feature(unsigned long feature) { int i; +#ifndef __clang__ /* clang can't cope with this */ BUILD_BUG_ON(!__builtin_constant_p(feature)); +#endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG if (!static_key_initialized) { -- GitLab From 4c953848c95b74def08172b07c04a773071f69ee Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Wed, 25 Jan 2017 09:54:33 -0600 Subject: [PATCH 1395/1442] powerpc/mm: Use the correct pointer when setting a 2MB pte commit a0615a16f7d0ceb5804d295203c302d496d8ee91 upstream. When setting a 2MB pte, radix__map_kernel_page() is using the address ptep = (pte_t *)pudp; Fix this conversion to use pmdp instead. Use pmdp_ptep() to do this instead of casting the pointer. Fixes: 2bfd65e45e87 ("powerpc/mm/radix: Add radix callbacks for early init routines") Reviewed-by: Aneesh Kumar K.V Signed-off-by: Reza Arbab Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/pgtable-radix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index ebb7f46f0532..9a25dce87875 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -65,7 +65,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, if (!pmdp) return -ENOMEM; if (map_page_size == PMD_SIZE) { - ptep = (pte_t *)pudp; + ptep = pmdp_ptep(pmdp); goto set_the_pte; } ptep = pte_alloc_kernel(pmdp, ea); @@ -90,7 +90,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, } pmdp = pmd_offset(pudp, ea); if (map_page_size == PMD_SIZE) { - ptep = (pte_t *)pudp; + ptep = pmdp_ptep(pmdp); goto set_the_pte; } if (!pmd_present(*pmdp)) { -- GitLab From 743146d347f3141cd5a82f4c9aace1790a7537b9 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 18 Jan 2017 19:04:42 +0800 Subject: [PATCH 1396/1442] NFSD: Fix a null reference case in find_or_create_lock_stateid() commit d19fb70dd68c4e960e2ac09b0b9c79dfdeefa726 upstream. nfsd assigns the nfs4_free_lock_stateid to .sc_free in init_lock_stateid(). If nfsd doesn't go through init_lock_stateid() and put stateid at end, there is a NULL reference to .sc_free when calling nfs4_put_stid(ns). This patch let the nfs4_stid.sc_free assignment to nfs4_alloc_stid(). Fixes: 356a95ece7aa "nfsd: clean up races in lock stateid searching..." Signed-off-by: Kinglong Mee Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4layouts.c | 5 +++-- fs/nfsd/nfs4state.c | 19 ++++++++----------- fs/nfsd/state.h | 4 ++-- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 42aace4fc4c8..64813697f4c4 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -223,10 +223,11 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, struct nfs4_layout_stateid *ls; struct nfs4_stid *stp; - stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache); + stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache, + nfsd4_free_layout_stateid); if (!stp) return NULL; - stp->sc_free = nfsd4_free_layout_stateid; + get_nfs4_file(fp); stp->sc_file = fp; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4b4beaaa4eaa..a0dee8ae9f97 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -633,8 +633,8 @@ find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new) return co; } -struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, - struct kmem_cache *slab) +struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, + void (*sc_free)(struct nfs4_stid *)) { struct nfs4_stid *stid; int new_id; @@ -650,6 +650,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, idr_preload_end(); if (new_id < 0) goto out_free; + + stid->sc_free = sc_free; stid->sc_client = cl; stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; @@ -675,15 +677,12 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { struct nfs4_stid *stid; - struct nfs4_ol_stateid *stp; - stid = nfs4_alloc_stid(clp, stateid_slab); + stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid); if (!stid) return NULL; - stp = openlockstateid(stid); - stp->st_stid.sc_free = nfs4_free_ol_stateid; - return stp; + return openlockstateid(stid); } static void nfs4_free_deleg(struct nfs4_stid *stid) @@ -781,11 +780,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, goto out_dec; if (delegation_blocked(¤t_fh->fh_handle)) goto out_dec; - dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); + dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg)); if (dp == NULL) goto out_dec; - dp->dl_stid.sc_free = nfs4_free_deleg; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -5580,7 +5578,6 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; - stp->st_stid.sc_free = nfs4_free_lock_stateid; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; @@ -5623,7 +5620,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, lst = find_lock_stateid(lo, fi); if (lst == NULL) { spin_unlock(&clp->cl_lock); - ns = nfs4_alloc_stid(clp, stateid_slab); + ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); if (ns == NULL) return NULL; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index c9399366f9df..4516e8b7d776 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -603,8 +603,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, struct nfsd_net *nn); -struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, - struct kmem_cache *slab); +struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, + void (*sc_free)(struct nfs4_stid *)); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); -- GitLab From a3d729526f2fd8b9a6ff9f6012f8344cadfd432c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 31 Jan 2017 11:37:50 -0500 Subject: [PATCH 1397/1442] svcrpc: fix oops in absence of krb5 module commit 034dd34ff4916ec1f8f74e39ca3efb04eab2f791 upstream. Olga Kornievskaia says: "I ran into this oops in the nfsd (below) (4.10-rc3 kernel). To trigger this I had a client (unsuccessfully) try to mount the server with krb5 where the server doesn't have the rpcsec_gss_krb5 module built." The problem is that rsci.cred is copied from a svc_cred structure that gss_proxy didn't properly initialize. Fix that. [120408.542387] general protection fault: 0000 [#1] SMP ... [120408.565724] CPU: 0 PID: 3601 Comm: nfsd Not tainted 4.10.0-rc3+ #16 [120408.567037] Hardware name: VMware, Inc. VMware Virtual = Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [120408.569225] task: ffff8800776f95c0 task.stack: ffffc90003d58000 [120408.570483] RIP: 0010:gss_mech_put+0xb/0x20 [auth_rpcgss] ... [120408.584946] ? rsc_free+0x55/0x90 [auth_rpcgss] [120408.585901] gss_proxy_save_rsc+0xb2/0x2a0 [auth_rpcgss] [120408.587017] svcauth_gss_proxy_init+0x3cc/0x520 [auth_rpcgss] [120408.588257] ? __enqueue_entity+0x6c/0x70 [120408.589101] svcauth_gss_accept+0x391/0xb90 [auth_rpcgss] [120408.590212] ? try_to_wake_up+0x4a/0x360 [120408.591036] ? wake_up_process+0x15/0x20 [120408.592093] ? svc_xprt_do_enqueue+0x12e/0x2d0 [sunrpc] [120408.593177] svc_authenticate+0xe1/0x100 [sunrpc] [120408.594168] svc_process_common+0x203/0x710 [sunrpc] [120408.595220] svc_process+0x105/0x1c0 [sunrpc] [120408.596278] nfsd+0xe9/0x160 [nfsd] [120408.597060] kthread+0x101/0x140 [120408.597734] ? nfsd_destroy+0x60/0x60 [nfsd] [120408.598626] ? kthread_park+0x90/0x90 [120408.599448] ret_from_fork+0x22/0x30 Fixes: 1d658336b05f "SUNRPC: Add RPC based upcall mechanism for RPCGSS auth" Cc: Simo Sorce Reported-by: Olga Kornievskaia Tested-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/auth_gss/gss_rpc_xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index dc6fb79a361f..25d9a9cf7b66 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -260,7 +260,7 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, if (!oa->data) return -ENOMEM; - creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL); + creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL); if (!creds) { kfree(oa->data); return -ENOMEM; -- GitLab From f0c3a0ac3349146259c235215f474de48b45bd89 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 3 Feb 2017 13:13:09 -0800 Subject: [PATCH 1398/1442] zswap: disable changing params if init fails commit d7b028f56a971a2e4d8d7887540a144eeefcd4ab upstream. Add zswap_init_failed bool that prevents changing any of the module params, if init_zswap() fails, and set zswap_enabled to false. Change 'enabled' param to a callback, and check zswap_init_failed before allowing any change to 'enabled', 'zpool', or 'compressor' params. Any driver that is built-in to the kernel will not be unloaded if its init function returns error, and its module params remain accessible for users to change via sysfs. Since zswap uses param callbacks, which assume that zswap has been initialized, changing the zswap params after a failed initialization will result in WARNING due to the param callbacks expecting a pool to already exist. This prevents that by immediately exiting any of the param callbacks if initialization failed. This was reported here: https://marc.info/?l=linux-mm&m=147004228125528&w=4 And fixes this WARNING: [ 429.723476] WARNING: CPU: 0 PID: 5140 at mm/zswap.c:503 __zswap_pool_current+0x56/0x60 The warning is just noise, and not serious. However, when init fails, zswap frees all its percpu dstmem pages and its kmem cache. The kmem cache might be serious, if kmem_cache_alloc(NULL, gfp) has problems; but the percpu dstmem pages are definitely a problem, as they're used as temporary buffer for compressed pages before copying into place in the zpool. If the user does get zswap enabled after an init failure, then zswap will likely Oops on the first page it tries to compress (or worse, start corrupting memory). Fixes: 90b0fc26d5db ("zswap: change zpool/compressor at runtime") Link: http://lkml.kernel.org/r/20170124200259.16191-2-ddstreet@ieee.org Signed-off-by: Dan Streetman Reported-by: Marcin Miroslaw Cc: Seth Jennings Cc: Michal Hocko Cc: Sergey Senozhatsky Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/zswap.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/mm/zswap.c b/mm/zswap.c index 275b22cc8df4..dbef27822a98 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -78,7 +78,13 @@ static u64 zswap_duplicate_entry; /* Enable/disable zswap (disabled by default) */ static bool zswap_enabled; -module_param_named(enabled, zswap_enabled, bool, 0644); +static int zswap_enabled_param_set(const char *, + const struct kernel_param *); +static struct kernel_param_ops zswap_enabled_param_ops = { + .set = zswap_enabled_param_set, + .get = param_get_bool, +}; +module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); /* Crypto compressor to use */ #define ZSWAP_COMPRESSOR_DEFAULT "lzo" @@ -176,6 +182,9 @@ static atomic_t zswap_pools_count = ATOMIC_INIT(0); /* used by param callback function */ static bool zswap_init_started; +/* fatal error during init */ +static bool zswap_init_failed; + /********************************* * helpers and fwd declarations **********************************/ @@ -706,6 +715,11 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, char *s = strstrip((char *)val); int ret; + if (zswap_init_failed) { + pr_err("can't set param, initialization failed\n"); + return -ENODEV; + } + /* no change required */ if (!strcmp(s, *(char **)kp->arg)) return 0; @@ -785,6 +799,17 @@ static int zswap_zpool_param_set(const char *val, return __zswap_param_set(val, kp, NULL, zswap_compressor); } +static int zswap_enabled_param_set(const char *val, + const struct kernel_param *kp) +{ + if (zswap_init_failed) { + pr_err("can't enable, initialization failed\n"); + return -ENODEV; + } + + return param_set_bool(val, kp); +} + /********************************* * writeback code **********************************/ @@ -1271,6 +1296,9 @@ static int __init init_zswap(void) dstmem_fail: zswap_entry_cache_destroy(); cache_fail: + /* if built-in, we aren't unloaded on failure; don't allow use */ + zswap_init_failed = true; + zswap_enabled = false; return -ENOMEM; } /* must be late so crypto has time to come up */ -- GitLab From 9e255997c2e169ba4bca92e6f84324581b28abbe Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 13 Jan 2017 15:00:16 +0100 Subject: [PATCH 1399/1442] cifs: initialize file_info_lock commit 81ddd8c0c5e1cb41184d66567140cb48c53eb3d1 upstream. Reviewed-by: Jeff Layton file_info_lock is not initalized in initiate_cifs_search(), leading to the following splat after a simple "mount.cifs ... dir && ls dir/": BUG: spinlock bad magic on CPU#0, ls/486 lock: 0xffff880009301110, .magic: 00000000, .owner: /-1, .owner_cpu: 0 CPU: 0 PID: 486 Comm: ls Not tainted 4.9.0 #27 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ffffc900042f3db0 ffffffff81327533 0000000000000000 ffff880009301110 ffffc900042f3dd0 ffffffff810baf75 ffff880009301110 ffffffff817ae077 ffffc900042f3df0 ffffffff810baff6 ffff880009301110 ffff880008d69900 Call Trace: [] dump_stack+0x65/0x92 [] spin_dump+0x85/0xe0 [] spin_bug+0x26/0x30 [] do_raw_spin_lock+0xe9/0x130 [] _raw_spin_lock+0x1f/0x30 [] cifs_closedir+0x4d/0x100 [] __fput+0x5d/0x160 [] ____fput+0xe/0x10 [] task_work_run+0x7e/0xa0 [] exit_to_usermode_loop+0x92/0xa0 [] syscall_return_slowpath+0x49/0x50 [] entry_SYSCALL_64_fastpath+0xa7/0xa9 Fixes: 3afca265b5f53a0 ("Clarify locking of cifs file and tcon structures and make more granular") Signed-off-by: Rabin Vincent Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/readdir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 8f6a2a5863b9..a27fc8791551 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -285,6 +285,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file) rc = -ENOMEM; goto error_exit; } + spin_lock_init(&cifsFile->file_info_lock); file->private_data = cifsFile; cifsFile->tlink = cifs_get_tlink(tlink); tcon = tlink_tcon(tlink); -- GitLab From 72f7419610c838abc5e3fde87835a5581e8e368c Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 3 Feb 2017 13:13:20 -0800 Subject: [PATCH 1400/1442] mm/memory_hotplug.c: check start_pfn in test_pages_in_a_zone() commit deb88a2a19e85842d79ba96b05031739ec327ff4 upstream. Patch series "fix a kernel oops when reading sysfs valid_zones", v2. A sysfs memory file is created for each 2GiB memory block on x86-64 when the system has 64GiB or more memory. [1] When the start address of a memory block is not backed by struct page, i.e. a memory range is not aligned by 2GiB, reading its 'valid_zones' attribute file leads to a kernel oops. This issue was observed on multiple x86-64 systems with more than 64GiB of memory. This patch-set fixes this issue. Patch 1 first fixes an issue in test_pages_in_a_zone(), which does not test the start section. Patch 2 then fixes the kernel oops by extending test_pages_in_a_zone() to return valid [start, end). Note for stable kernels: The memory block size change was made by commit bdee237c0343 ("x86: mm: Use 2GB memory block size on large-memory x86-64 systems"), which was accepted to 3.9. However, this patch-set depends on (and fixes) the change to test_pages_in_a_zone() made by commit 5f0f2887f4de ("mm/memory_hotplug.c: check for missing sections in test_pages_in_a_zone()"), which was accepted to 4.4. So, I recommend that we backport it up to 4.4. [1] 'Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on large-memory x86-64 systems")' This patch (of 2): test_pages_in_a_zone() does not check 'start_pfn' when it is aligned by section since 'sec_end_pfn' is set equal to 'pfn'. Since this function is called for testing the range of a sysfs memory file, 'start_pfn' is always aligned by section. Fix it by properly setting 'sec_end_pfn' to the next section pfn. Also make sure that this function returns 1 only when the range belongs to a zone. Link: http://lkml.kernel.org/r/20170127222149.30893-2-toshi.kani@hpe.com Signed-off-by: Toshi Kani Cc: Andrew Banman Cc: Reza Arbab Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory_hotplug.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c3a8141ac788..3a330d7cae69 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1483,7 +1483,7 @@ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) } /* - * Confirm all pages in a range [start, end) is belongs to the same zone. + * Confirm all pages in a range [start, end) belong to the same zone. */ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) { @@ -1491,9 +1491,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) struct zone *zone = NULL; struct page *page; int i; - for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn); + for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1); pfn < end_pfn; - pfn = sec_end_pfn + 1, sec_end_pfn += PAGES_PER_SECTION) { + pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) { /* Make sure the memory section is present first */ if (!present_section_nr(pfn_to_section_nr(pfn))) continue; @@ -1512,7 +1512,11 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) zone = page_zone(page); } } - return 1; + + if (zone) + return 1; + else + return 0; } /* -- GitLab From 6cb0497aec810617388dfe674209cd417f509844 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 3 Feb 2017 13:13:23 -0800 Subject: [PATCH 1401/1442] base/memory, hotplug: fix a kernel oops in show_valid_zones() commit a96dfddbcc04336bbed50dc2b24823e45e09e80c upstream. Reading a sysfs "memoryN/valid_zones" file leads to the following oops when the first page of a range is not backed by struct page. show_valid_zones() assumes that 'start_pfn' is always valid for page_zone(). BUG: unable to handle kernel paging request at ffffea017a000000 IP: show_valid_zones+0x6f/0x160 This issue may happen on x86-64 systems with 64GiB or more memory since their memory block size is bumped up to 2GiB. [1] An example of such systems is desribed below. 0x3240000000 is only aligned by 1GiB and this memory block starts from 0x3200000000, which is not backed by struct page. BIOS-e820: [mem 0x0000003240000000-0x000000603fffffff] usable Since test_pages_in_a_zone() already checks holes, fix this issue by extending this function to return 'valid_start' and 'valid_end' for a given range. show_valid_zones() then proceeds with the valid range. [1] 'Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on large-memory x86-64 systems")' Link: http://lkml.kernel.org/r/20170127222149.30893-3-toshi.kani@hpe.com Signed-off-by: Toshi Kani Cc: Greg Kroah-Hartman Cc: Zhang Zhen Cc: Reza Arbab Cc: David Rientjes Cc: Dan Williams Signed-off-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 12 ++++++------ include/linux/memory_hotplug.h | 3 ++- mm/memory_hotplug.c | 20 +++++++++++++++----- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index e7f86a8887d2..c5cdd190b781 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -391,33 +391,33 @@ static ssize_t show_valid_zones(struct device *dev, { struct memory_block *mem = to_memory_block(dev); unsigned long start_pfn, end_pfn; + unsigned long valid_start, valid_end, valid_pages; unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; - struct page *first_page; struct zone *zone; int zone_shift = 0; start_pfn = section_nr_to_pfn(mem->start_section_nr); end_pfn = start_pfn + nr_pages; - first_page = pfn_to_page(start_pfn); /* The block contains more than one zone can not be offlined. */ - if (!test_pages_in_a_zone(start_pfn, end_pfn)) + if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) return sprintf(buf, "none\n"); - zone = page_zone(first_page); + zone = page_zone(pfn_to_page(valid_start)); + valid_pages = valid_end - valid_start; /* MMOP_ONLINE_KEEP */ sprintf(buf, "%s", zone->name); /* MMOP_ONLINE_KERNEL */ - zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL, &zone_shift); + zone_can_shift(valid_start, valid_pages, ZONE_NORMAL, &zone_shift); if (zone_shift) { strcat(buf, " "); strcat(buf, (zone + zone_shift)->name); } /* MMOP_ONLINE_MOVABLE */ - zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE, &zone_shift); + zone_can_shift(valid_start, valid_pages, ZONE_MOVABLE, &zone_shift); if (zone_shift) { strcat(buf, " "); strcat(buf, (zone + zone_shift)->name); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index c1784c0b4f35..134a2f69c21a 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -85,7 +85,8 @@ extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); /* VM interface that may be used by firmware interface */ extern int online_pages(unsigned long, unsigned long, int); -extern int test_pages_in_a_zone(unsigned long, unsigned long); +extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, + unsigned long *valid_start, unsigned long *valid_end); extern void __offline_isolated_pages(unsigned long, unsigned long); typedef void (*online_page_callback_t)(struct page *page); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3a330d7cae69..ede137345a99 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1484,10 +1484,13 @@ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) /* * Confirm all pages in a range [start, end) belong to the same zone. + * When true, return its valid [start, end). */ -int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) +int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, + unsigned long *valid_start, unsigned long *valid_end) { unsigned long pfn, sec_end_pfn; + unsigned long start, end; struct zone *zone = NULL; struct page *page; int i; @@ -1509,14 +1512,20 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; + if (!zone) + start = pfn + i; zone = page_zone(page); + end = pfn + MAX_ORDER_NR_PAGES; } } - if (zone) + if (zone) { + *valid_start = start; + *valid_end = end; return 1; - else + } else { return 0; + } } /* @@ -1863,6 +1872,7 @@ static int __ref __offline_pages(unsigned long start_pfn, long offlined_pages; int ret, drain, retry_max, node; unsigned long flags; + unsigned long valid_start, valid_end; struct zone *zone; struct memory_notify arg; @@ -1873,10 +1883,10 @@ static int __ref __offline_pages(unsigned long start_pfn, return -EINVAL; /* This makes hotplug much easier...and readable. we assume this for now. .*/ - if (!test_pages_in_a_zone(start_pfn, end_pfn)) + if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) return -EINVAL; - zone = page_zone(pfn_to_page(start_pfn)); + zone = page_zone(pfn_to_page(valid_start)); node = zone_to_nid(zone); nr_pages = end_pfn - start_pfn; -- GitLab From b67c7d39bc284776c27eeaefd424046c742b0d93 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 3 Feb 2017 13:13:29 -0800 Subject: [PATCH 1402/1442] mm, fs: check for fatal signals in do_generic_file_read() commit 5abf186a30a89d5b9c18a6bf93a2c192c9fd52f6 upstream. do_generic_file_read() can be told to perform a large request from userspace. If the system is under OOM and the reading task is the OOM victim then it has an access to memory reserves and finishing the full request can lead to the full memory depletion which is dangerous. Make sure we rather go with a short read and allow the killed task to terminate. Link: http://lkml.kernel.org/r/20170201092706.9966-3-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Christoph Hellwig Cc: Tetsuo Handa Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index 779801092ef1..d8d7df82c69a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1703,6 +1703,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, cond_resched(); find_page: + if (fatal_signal_pending(current)) { + error = -EINTR; + goto out; + } + page = find_get_page(mapping, index); if (!page) { page_cache_sync_readahead(mapping, -- GitLab From a93ae8dccc3c723ed7a629dab37a3392387acd79 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 30 Jan 2017 19:27:10 -0500 Subject: [PATCH 1403/1442] tracing: Fix hwlat kthread migration commit 79c6f448c8b79c321e4a1f31f98194e4f6b6cae7 upstream. The hwlat tracer creates a kernel thread at start of the tracer. It is pinned to a single CPU and will move to the next CPU after each period of running. If the user modifies the migration thread's affinity, it will not change after that happens. The original code created the thread at the first instance it was called, but later was changed to destroy the thread after the tracer was finished, and would not be created until the next instance of the tracer was established. The code that initialized the affinity was only called on the initial instantiation of the tracer. After that, it was not initialized, and the previous affinity did not match the current newly created one, making it appear that the user modified the thread's affinity when it did not, and the thread failed to migrate again. Fixes: 0330f7aa8ee6 ("tracing: Have hwlat trace migrate across tracing_cpumask CPUs") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_hwlat.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index b97286c48735..f00b0131c8f9 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -266,7 +266,7 @@ static int get_sample(void) static struct cpumask save_cpumask; static bool disable_migrate; -static void move_to_next_cpu(void) +static void move_to_next_cpu(bool initmask) { static struct cpumask *current_mask; int next_cpu; @@ -275,7 +275,7 @@ static void move_to_next_cpu(void) return; /* Just pick the first CPU on first iteration */ - if (!current_mask) { + if (initmask) { current_mask = &save_cpumask; get_online_cpus(); cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask); @@ -330,10 +330,12 @@ static void move_to_next_cpu(void) static int kthread_fn(void *data) { u64 interval; + bool initmask = true; while (!kthread_should_stop()) { - move_to_next_cpu(); + move_to_next_cpu(initmask); + initmask = false; local_irq_disable(); get_sample(); -- GitLab From a150e08704b24311a4d6215aade46691d6a7006a Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 18 Jan 2017 21:30:51 +0100 Subject: [PATCH 1404/1442] can: bcm: fix hrtimer/tasklet termination in bcm op removal commit a06393ed03167771246c4c43192d9c264bc48412 upstream. When removing a bcm tx operation either a hrtimer or a tasklet might run. As the hrtimer triggers its associated tasklet and vice versa we need to take care to mutually terminate both handlers. Reported-by: Michael Josenhans Signed-off-by: Oliver Hartkopp Tested-by: Michael Josenhans Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/bcm.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 436a7537e6a9..5e9ed5ec2860 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -734,14 +734,23 @@ static struct bcm_op *bcm_find_op(struct list_head *ops, static void bcm_remove_op(struct bcm_op *op) { - hrtimer_cancel(&op->timer); - hrtimer_cancel(&op->thrtimer); - - if (op->tsklet.func) - tasklet_kill(&op->tsklet); + if (op->tsklet.func) { + while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) || + test_bit(TASKLET_STATE_RUN, &op->tsklet.state) || + hrtimer_active(&op->timer)) { + hrtimer_cancel(&op->timer); + tasklet_kill(&op->tsklet); + } + } - if (op->thrtsklet.func) - tasklet_kill(&op->thrtsklet); + if (op->thrtsklet.func) { + while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) || + test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) || + hrtimer_active(&op->thrtimer)) { + hrtimer_cancel(&op->thrtimer); + tasklet_kill(&op->thrtsklet); + } + } if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); -- GitLab From 1d88791d5ed5dae4ff8cbbf6147e90ff95d92ba9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Jan 2017 16:47:28 -0500 Subject: [PATCH 1405/1442] cgroup: don't online subsystems before cgroup_name/path() are operational commit 07cd12945551b63ecb1a349d50a6d69d1d6feb4a upstream. While refactoring cgroup creation, a5bca2152036 ("cgroup: factor out cgroup_create() out of cgroup_mkdir()") incorrectly onlined subsystems before the new cgroup is associated with it kernfs_node. This is fine for cgroup proper but cgroup_name/path() depend on the associated kernfs_node and if a subsystem makes the new cgroup_subsys_state visible, which they're allowed to after onlining, it can lead to NULL dereference. The current code performs cgroup creation and subsystem onlining in cgroup_create() and cgroup_mkdir() makes the cgroup and subsystems visible afterwards. There's no reason to online the subsystems early and we can simply drop cgroup_apply_control_enable() call from cgroup_create() so that the subsystems are onlined and made visible at the same time. Signed-off-by: Tejun Heo Reported-by: Konstantin Khlebnikov Fixes: a5bca2152036 ("cgroup: factor out cgroup_create() out of cgroup_mkdir()") Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 85bc9beb046d..4e2f3de0e40b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5219,6 +5219,11 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, return ERR_PTR(err); } +/* + * The returned cgroup is fully initialized including its control mask, but + * it isn't associated with its kernfs_node and doesn't have the control + * mask applied. + */ static struct cgroup *cgroup_create(struct cgroup *parent) { struct cgroup_root *root = parent->root; @@ -5283,11 +5288,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent) cgroup_propagate_control(cgrp); - /* @cgrp doesn't have dir yet so the following will only create csses */ - ret = cgroup_apply_control_enable(cgrp); - if (ret) - goto out_destroy; - return cgrp; out_cancel_ref: @@ -5295,9 +5295,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent) out_free_cgrp: kfree(cgrp); return ERR_PTR(ret); -out_destroy: - cgroup_destroy_locked(cgrp); - return ERR_PTR(ret); } static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, -- GitLab From 04eb7db25bb1bcf4ee1631d1099fc03308929c12 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Jan 2017 12:23:42 -0200 Subject: [PATCH 1406/1442] mmc: sdhci: Ignore unexpected CARD_INT interrupts commit 161e6d44a5e2d3f85365cb717d60e363171b39e6 upstream. One of our kernelCI boxes hanged at boot because a faulty eSDHC device was triggering spurious CARD_INT interrupts for SD cards, causing CMD52 reads, which are not allowed for SD devices. This adds a sanity check to the interruption path, preventing that illegal command from getting sent if the CARD_INT interruption should be disabled. This quirk allows that particular machine to resume boot despite the faulty hardware, instead of getting hung dealing with thousands of mishandled interrupts. Suggested-by: Adrian Hunter Signed-off-by: Gabriel Krisman Bertazi Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index e1e274a0a34f..ba637ff8aa7e 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2719,7 +2719,8 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id) if (intmask & SDHCI_INT_RETUNE) mmc_retune_needed(host->mmc); - if (intmask & SDHCI_INT_CARD_INT) { + if ((intmask & SDHCI_INT_CARD_INT) && + (host->ier & SDHCI_INT_CARD_INT)) { sdhci_enable_sdio_irq_nolock(host, false); host->thread_isr |= SDHCI_INT_CARD_INT; result = IRQ_WAKE_THREAD; -- GitLab From 1594edd9ea0d75ef106bffc23c2b07b509f3301c Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Mon, 30 Jan 2017 11:09:36 +0100 Subject: [PATCH 1407/1442] vhost: fix initialization for vq->is_le commit cda8bba0f99d25d2061c531113c14fa41effc3ae upstream. Currently, under certain circumstances vhost_init_is_le does just a part of the initialization job, and depends on vhost_reset_is_le being called too. For this reason vhost_vq_init_access used to call vhost_reset_is_le when vq->private_data is NULL. This is not only counter intuitive, but also real a problem because it breaks vhost_net. The bug was introduced to vhost_net with commit 2751c9882b94 ("vhost: cross-endian support for legacy devices"). The symptom is corruption of the vq's used.idx field (virtio) after VHOST_NET_SET_BACKEND was issued as a part of the vhost shutdown on a vq with pending descriptors. Let us make sure the outcome of vhost_init_is_le never depend on the state it is actually supposed to initialize, and fix virtio_net by removing the reset from vhost_vq_init_access. With the above, there is no reason for vhost_reset_is_le to do just half of the job. Let us make vhost_reset_is_le reinitialize is_le. Signed-off-by: Halil Pasic Reported-by: Michael A. Tebolt Reported-by: Dr. David Alan Gilbert Fixes: commit 2751c9882b94 ("vhost: cross-endian support for legacy devices") Signed-off-by: Michael S. Tsirkin Reviewed-by: Greg Kurz Tested-by: Michael A. Tebolt Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c6f2d89c0e97..64613fbf5cf8 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -130,14 +130,14 @@ static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, static void vhost_init_is_le(struct vhost_virtqueue *vq) { - if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) - vq->is_le = true; + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) + || virtio_legacy_is_little_endian(); } #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ static void vhost_reset_is_le(struct vhost_virtqueue *vq) { - vq->is_le = virtio_legacy_is_little_endian(); + vhost_init_is_le(vq); } struct vhost_flush_struct { @@ -1713,10 +1713,8 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq) int r; bool is_le = vq->is_le; - if (!vq->private_data) { - vhost_reset_is_le(vq); + if (!vq->private_data) return 0; - } vhost_init_is_le(vq); -- GitLab From 8ee8ff9e2652dc9e79230dfcb3c70d0efbcde493 Mon Sep 17 00:00:00 2001 From: Rask Ingemann Lambertsen Date: Sat, 21 Jan 2017 17:11:43 +0100 Subject: [PATCH 1408/1442] regulator: axp20x: AXP806: Fix dcdcb being set instead of dcdce commit d0e287a401d9acf67b75180b26e2d62b7d482652 upstream. A typo or copy-paste bug means that the register access intended for regulator dcdce goes to dcdcb instead. This patch corrects it. Fixes: 2ca342d391e3 (regulator: axp20x: Support AXP806 variant) Signed-off-by: Rask Ingemann Lambertsen Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/axp20x-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index e6a512ebeae2..a3ade9e4ef47 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -272,7 +272,7 @@ static const struct regulator_desc axp806_regulators[] = { 64, AXP806_DCDCD_V_CTRL, 0x3f, AXP806_PWR_OUT_CTRL1, BIT(3)), AXP_DESC(AXP806, DCDCE, "dcdce", "vine", 1100, 3400, 100, - AXP806_DCDCB_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(4)), + AXP806_DCDCE_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(4)), AXP_DESC(AXP806, ALDO1, "aldo1", "aldoin", 700, 3300, 100, AXP806_ALDO1_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(5)), AXP_DESC(AXP806, ALDO2, "aldo2", "aldoin", 700, 3400, 100, -- GitLab From 12f822d23deee45421bf65dc9f5ff0fdcc783701 Mon Sep 17 00:00:00 2001 From: Douglas Miller Date: Sat, 28 Jan 2017 06:42:20 -0600 Subject: [PATCH 1409/1442] percpu-refcount: fix reference leak during percpu-atomic transition commit 966d2b04e070bc040319aaebfec09e0144dc3341 upstream. percpu_ref_tryget() and percpu_ref_tryget_live() should return "true" IFF they acquire a reference. But the return value from atomic_long_inc_not_zero() is a long and may have high bits set, e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines is bool so the reference may actually be acquired but the routines return "false" which results in a reference leak since the caller assumes it does not need to do a corresponding percpu_ref_put(). This was seen when performing CPU hotplug during I/O, as hangs in blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start) raced with percpu_ref_tryget (blk_mq_timeout_work). Sample stack trace: __switch_to+0x2c0/0x450 __schedule+0x2f8/0x970 schedule+0x48/0xc0 blk_mq_freeze_queue_wait+0x94/0x120 blk_mq_queue_reinit_work+0xb8/0x180 blk_mq_queue_reinit_prepare+0x84/0xa0 cpuhp_invoke_callback+0x17c/0x600 cpuhp_up_callbacks+0x58/0x150 _cpu_up+0xf0/0x1c0 do_cpu_up+0x120/0x150 cpu_subsys_online+0x64/0xe0 device_online+0xb4/0x120 online_store+0xb4/0xc0 dev_attr_store+0x68/0xa0 sysfs_kf_write+0x80/0xb0 kernfs_fop_write+0x17c/0x250 __vfs_write+0x6c/0x1e0 vfs_write+0xd0/0x270 SyS_write+0x6c/0x110 system_call+0x38/0xe0 Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS, and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests. However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0 and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set. The fix is to make the tryget routines use an actual boolean internally instead of the atomic long result truncated to a int. Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751 Signed-off-by: Douglas Miller Reviewed-by: Jens Axboe Signed-off-by: Tejun Heo Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints") Signed-off-by: Greg Kroah-Hartman --- include/linux/percpu-refcount.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 1c7eec09e5eb..3a481a49546e 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -204,7 +204,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) static inline bool percpu_ref_tryget(struct percpu_ref *ref) { unsigned long __percpu *percpu_count; - int ret; + bool ret; rcu_read_lock_sched(); @@ -238,7 +238,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) { unsigned long __percpu *percpu_count; - int ret = false; + bool ret = false; rcu_read_lock_sched(); -- GitLab From 16f61dee7e7cb378216ff59ffeacf157efad6c82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 13 Jan 2017 12:23:35 +0100 Subject: [PATCH 1410/1442] Revert "bcma: init serial console directly from ChipCommon code" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7195439d1d71bc4a6c33cfb57bc669a7cd041041 upstream. This reverts commit 4c81acab3816 ("bcma: init serial console directly from ChipCommon code") as it broke IRQ assignment. Getting IRQ with bcma_core_irq helper on SoC requires MIPS core to be set. It happens *after* ChipCommon initialization so we can't do this so early. This fixes a user reported regression. It wasn't critical as serial was still somehow working but lack of IRQs was making in unreliable. Fixes: 4c81acab3816 ("bcma: init serial console directly from ChipCommon code") Reported-by: Felix Fietkau Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/bcma/bcma_private.h | 3 +++ drivers/bcma/driver_chipcommon.c | 11 +++-------- drivers/bcma/driver_mips.c | 3 +++ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index f642c4264c27..168fa175d65a 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -45,6 +45,9 @@ int bcma_sprom_get(struct bcma_bus *bus); void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc); void bcma_core_chipcommon_init(struct bcma_drv_cc *cc); void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable); +#ifdef CONFIG_BCMA_DRIVER_MIPS +void bcma_chipco_serial_init(struct bcma_drv_cc *cc); +#endif /* CONFIG_BCMA_DRIVER_MIPS */ /* driver_chipcommon_b.c */ int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb); diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index b4f6520e74f0..62f5bfa5065d 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -15,8 +15,6 @@ #include #include -static void bcma_chipco_serial_init(struct bcma_drv_cc *cc); - static inline u32 bcma_cc_write32_masked(struct bcma_drv_cc *cc, u16 offset, u32 mask, u32 value) { @@ -186,9 +184,6 @@ void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc) if (cc->capabilities & BCMA_CC_CAP_PMU) bcma_pmu_early_init(cc); - if (IS_BUILTIN(CONFIG_BCM47XX) && bus->hosttype == BCMA_HOSTTYPE_SOC) - bcma_chipco_serial_init(cc); - if (bus->hosttype == BCMA_HOSTTYPE_SOC) bcma_core_chipcommon_flash_detect(cc); @@ -378,9 +373,9 @@ u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value) return res; } -static void bcma_chipco_serial_init(struct bcma_drv_cc *cc) +#ifdef CONFIG_BCMA_DRIVER_MIPS +void bcma_chipco_serial_init(struct bcma_drv_cc *cc) { -#if IS_BUILTIN(CONFIG_BCM47XX) unsigned int irq; u32 baud_base; u32 i; @@ -422,5 +417,5 @@ static void bcma_chipco_serial_init(struct bcma_drv_cc *cc) ports[i].baud_base = baud_base; ports[i].reg_shift = 0; } -#endif /* CONFIG_BCM47XX */ } +#endif /* CONFIG_BCMA_DRIVER_MIPS */ diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index 96f171328200..89af807cf29c 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -278,9 +278,12 @@ static void bcma_core_mips_nvram_init(struct bcma_drv_mips *mcore) void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { + struct bcma_bus *bus = mcore->core->bus; + if (mcore->early_setup_done) return; + bcma_chipco_serial_init(&bus->drv_cc); bcma_core_mips_nvram_init(mcore); mcore->early_setup_done = true; -- GitLab From 449d3ecfbd7640dc763dbf333131858ead6c3f11 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 3 Feb 2017 05:43:52 +0200 Subject: [PATCH 1411/1442] Revert "vring: Force use of DMA API for ARM-based systems with legacy devices" commit 0d5415b489f68b58e1983a53793d25d53098ed4b upstream. This reverts commit c7070619f3408d9a0dffbed9149e6f00479cf43b. This has been shown to regress on some ARM systems: by forcing on DMA API usage for ARM systems, we have inadvertently kicked open a hornets' nest in terms of cache-coherency. Namely that unless the virtio device is explicitly described as capable of coherent DMA by firmware, the DMA APIs on ARM and other DT-based platforms will assume it is non-coherent. This turns out to cause a big problem for the likes of QEMU and kvmtool, which generate virtio-mmio devices in their guest DTs but neglect to add the often-overlooked "dma-coherent" property; as a result, we end up with the guest making non-cacheable accesses to the vring, the host doing so cacheably, both talking past each other and things going horribly wrong. We are working on a safer work-around. Fixes: c7070619f340 ("vring: Force use of DMA API for ARM-based systems with legacy devices") Reported-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Michael S. Tsirkin Acked-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_ring.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index f1360487a594..489bfc61cf30 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -159,13 +159,6 @@ static bool vring_use_dma_api(struct virtio_device *vdev) if (xen_domain()) return true; - /* - * On ARM-based machines, the DMA ops will do the right thing, - * so always use them with legacy devices. - */ - if (IS_ENABLED(CONFIG_ARM) || IS_ENABLED(CONFIG_ARM64)) - return !virtio_has_feature(vdev, VIRTIO_F_VERSION_1); - return false; } -- GitLab From 2cf6c49264e40e395e73fef11f487b10b05548b4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 26 Jan 2017 19:24:08 +0200 Subject: [PATCH 1412/1442] pinctrl: baytrail: Debounce register is one per community commit 1b89970d81bbd52720fc64a3fe9572ee33588363 upstream. Debounce value is set globally per community. Otherwise user will easily get a kernel crash when they start using the feature: BUG: unable to handle kernel paging request at ffffc900003be000 IP: byt_gpio_dbg_show+0xa9/0x430 Make it clear in byt_gpio_reg(). Note that this fix just prevents kernel to crash, but doesn't make any difference to the existing logic. It means the last caller will win the trade and debounce value will be configured accordingly. The actual logic fix needs to be thought about and it's not as important as crash fix. That's why the latter goes separately and right now. Fixes: 658b476c742f ("pinctrl: baytrail: Add debounce configuration") Cc: Cristina Ciocan Signed-off-by: Andy Shevchenko Reviewed-by: Jean Delvare Acked-by: Mika Westerberg Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-baytrail.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 7f2263d61063..583ae3f38fc0 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -731,16 +731,23 @@ static void __iomem *byt_gpio_reg(struct byt_gpio *vg, unsigned int offset, int reg) { struct byt_community *comm = byt_get_community(vg, offset); - u32 reg_offset = 0; + u32 reg_offset; if (!comm) return NULL; offset -= comm->pin_base; - if (reg == BYT_INT_STAT_REG) + switch (reg) { + case BYT_INT_STAT_REG: reg_offset = (offset / 32) * 4; - else + break; + case BYT_DEBOUNCE_REG: + reg_offset = 0; + break; + default: reg_offset = comm->pad_map[offset] * 16; + break; + } return comm->reg_base + reg_offset + reg; } -- GitLab From 3d8ec7d2d5f9fdb83617f044ad9bcfa3cf6830b9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 24 Jan 2017 17:28:22 +0200 Subject: [PATCH 1413/1442] pinctrl: intel: merrifield: Add missed check in mrfld_config_set() commit 19b26d92dfb70f56440c187a20c49102ab648b97 upstream. Not every pin can be configured. Add missed check to prevent access violation. Fixes: 4e80c8f50574 ("pinctrl: intel: Add Intel Merrifield pin controller support") Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/intel/pinctrl-merrifield.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c index 7826c7f0cb7c..9931be6af0ca 100644 --- a/drivers/pinctrl/intel/pinctrl-merrifield.c +++ b/drivers/pinctrl/intel/pinctrl-merrifield.c @@ -794,6 +794,9 @@ static int mrfld_config_set(struct pinctrl_dev *pctldev, unsigned int pin, unsigned int i; int ret; + if (!mrfld_buf_available(mp, pin)) + return -ENOTSUPP; + for (i = 0; i < nconfigs; i++) { switch (pinconf_to_config_param(configs[i])) { case PIN_CONFIG_BIAS_DISABLE: -- GitLab From 97663735a96f0cd7c851bbb8f08cff66d617220c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Mon, 10 Oct 2016 18:30:01 +0200 Subject: [PATCH 1414/1442] iwlwifi: fix double hyphen in MODULE_FIRMWARE for 8000 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7941c59e45f3b6d30e07375e9b6713427e0a9f98 upstream. Mistakenly, the driver is trying to load the 8000C firmware with an incorrect name (i.e. with two hyphens where there should be only one) and that fails. Fix that by removing the hyphen from the format macro. Fixes: e1ba684f762b ("iwlwifi: 8000: fix MODULE_FIRMWARE input") Signed-off-by: Jürg Billeter Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/iwl-8000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c index d02ca1491d16..8d3e53fac1da 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c @@ -91,7 +91,7 @@ #define IWL8000_FW_PRE "iwlwifi-8000C-" #define IWL8000_MODULE_FIRMWARE(api) \ - IWL8000_FW_PRE "-" __stringify(api) ".ucode" + IWL8000_FW_PRE __stringify(api) ".ucode" #define IWL8265_FW_PRE "iwlwifi-8265-" #define IWL8265_MODULE_FIRMWARE(api) \ -- GitLab From 40add19d3897dda97086ec7a73cf4e8daf7c00ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 2 Dec 2016 12:03:36 +0100 Subject: [PATCH 1415/1442] iwlwifi: mvm: avoid crash on restart w/o reserved queues commit 03c902bff524e0cf664737a33f2365f7837040bf upstream. When the firmware restarts in a situation in which any station has no queue reserved anymore because that queue was used, the code will crash trying to access the queue_info array at the offset 255, which is far too big. Fix this by checking that a queue is actually reserved before writing its status. Fixes: 8d98ae6eb0d5 ("iwlwifi: mvm: re-assign old queues after hw restart in dqa mode") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index fc771885e383..52de3c6d760c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1144,9 +1144,10 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, .frame_limit = IWL_FRAME_LIMIT, }; - /* Make sure reserved queue is still marked as such (or allocated) */ - mvm->queue_info[mvm_sta->reserved_queue].status = - IWL_MVM_QUEUE_RESERVED; + /* Make sure reserved queue is still marked as such (if allocated) */ + if (mvm_sta->reserved_queue != IEEE80211_INVAL_HW_QUEUE) + mvm->queue_info[mvm_sta->reserved_queue].status = + IWL_MVM_QUEUE_RESERVED; for (i = 0; i <= IWL_MAX_TID_COUNT; i++) { struct iwl_mvm_tid_data *tid_data = &mvm_sta->tid_data[i]; -- GitLab From 80246551c7d7085f177b52020cd54f8d44c86728 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 26 Jan 2017 17:34:40 +0000 Subject: [PATCH 1416/1442] HID: usbhid: Quirk a AMI virtual mouse and keyboard with ALWAYS_POLL commit ed9ab4287f96e66340e0390e2c583f2f9110cba0 upstream. Quirking the following AMI USB device with ALWAYS_POLL fixes an AMI virtual keyboard and mouse from not responding and timing out when it is attached to a ppc64el Power 8 system and when we have some rapid open/closes on the mouse device. usb 1-3: new high-speed USB device number 2 using xhci_hcd usb 1-3: New USB device found, idVendor=046b, idProduct=ff01 usb 1-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-3: Product: Virtual Hub usb 1-3: Manufacturer: American Megatrends Inc. usb 1-3: SerialNumber: serial usb 1-3.3: new high-speed USB device number 3 using xhci_hcd usb 1-3.3: New USB device found, idVendor=046b, idProduct=ff31 usb 1-3.3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-3.3: Product: Virtual HardDisk Device usb 1-3.3: Manufacturer: American Megatrends Inc. usb 1-3.4: new low-speed USB device number 4 using xhci_hcd usb 1-3.4: New USB device found, idVendor=046b, idProduct=ff10 usb 1-3.4: New USB device strings: Mfr=1, Product=2, SerialNumber=0 usb 1-3.4: Product: Virtual Keyboard and Mouse usb 1-3.4: Manufacturer: American Megatrends Inc. With the quirk I have not been able to trigger the issue with half an hour of saturation soak testing. Signed-off-by: Colin Ian King Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 575aa65436d1..9845189fae92 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -76,6 +76,9 @@ #define USB_VENDOR_ID_ALPS_JP 0x044E #define HID_DEVICE_ID_ALPS_U1_DUAL 0x120B +#define USB_VENDOR_ID_AMI 0x046b +#define USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE 0xff10 + #define USB_VENDOR_ID_ANTON 0x1130 #define USB_DEVICE_ID_ANTON_TOUCH_PAD 0x3101 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index e6cfd323babc..cde060fefa91 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -57,6 +57,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_AKAI, USB_DEVICE_ID_AKAI_MPKMINI2, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_AKAI_09E8, USB_DEVICE_ID_AKAI_09E8_MIDIMIX, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_AMI, USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS124U, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM, HID_QUIRK_NOGET }, -- GitLab From f24bc920612ccc68edb8ff4ee6e117a3e0a52304 Mon Sep 17 00:00:00 2001 From: Ardinartsev Nikita Date: Thu, 26 Jan 2017 16:54:42 +0300 Subject: [PATCH 1417/1442] HID: hid-lg: Fix immediate disconnection of Logitech Rumblepad 2 commit 877a021e08ccb6434718c0cc781fdf943c884cc0 upstream. With NOGET quirk Logitech F510 is now fully workable in dinput mode including rumble effects (according to fftest). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=117091 [jkosina@suse.cz: fix patch format] Signed-off-by: Ardinartsev Nikita Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-lg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index c5c5fbe9d605..52026dc94d5c 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -872,7 +872,7 @@ static const struct hid_device_id lg_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG), .driver_data = LG_NOGET | LG_FF4 }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2), - .driver_data = LG_FF2 }, + .driver_data = LG_NOGET | LG_FF2 }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FLIGHT_SYSTEM_G940), .driver_data = LG_FF3 }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR), -- GitLab From e6bd712154aaf121bff1791431854886f5983712 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Thu, 26 Jan 2017 09:06:22 -0800 Subject: [PATCH 1418/1442] HID: wacom: Fix poor prox handling in 'wacom_pl_irq' commit 282e4637bc1c0b338708bcebd09d31c69abec070 upstream. Commit 025bcc1 performed cleanup work on the 'wacom_pl_irq' function, making it follow the standards used in the rest of the codebase. The change unintiontionally allowed the function to send input events from reports that are not marked as being in prox. This can cause problems as the report values for X, Y, etc. are not guaranteed to be correct. In particular, occasionally the tablet will send a report with these values set to zero. If such a report is received it can caus an unexpected jump in the XY position. This patch surrounds more of the processing code with a proximity check, preventing these zeroed reports from overwriting the current state. To be safe, only the tool type and ABS_MISC events should be reported when the pen is marked as being out of prox. Fixes: 025bcc1540 ("HID: wacom: Simplify 'wacom_pl_irq'") Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 1cb79925730d..623be90704ab 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -164,19 +164,21 @@ static int wacom_pl_irq(struct wacom_wac *wacom) wacom->id[0] = STYLUS_DEVICE_ID; } - pressure = (signed char)((data[7] << 1) | ((data[4] >> 2) & 1)); - if (features->pressure_max > 255) - pressure = (pressure << 1) | ((data[4] >> 6) & 1); - pressure += (features->pressure_max + 1) / 2; - - input_report_abs(input, ABS_X, data[3] | (data[2] << 7) | ((data[1] & 0x03) << 14)); - input_report_abs(input, ABS_Y, data[6] | (data[5] << 7) | ((data[4] & 0x03) << 14)); - input_report_abs(input, ABS_PRESSURE, pressure); - - input_report_key(input, BTN_TOUCH, data[4] & 0x08); - input_report_key(input, BTN_STYLUS, data[4] & 0x10); - /* Only allow the stylus2 button to be reported for the pen tool. */ - input_report_key(input, BTN_STYLUS2, (wacom->tool[0] == BTN_TOOL_PEN) && (data[4] & 0x20)); + if (prox) { + pressure = (signed char)((data[7] << 1) | ((data[4] >> 2) & 1)); + if (features->pressure_max > 255) + pressure = (pressure << 1) | ((data[4] >> 6) & 1); + pressure += (features->pressure_max + 1) / 2; + + input_report_abs(input, ABS_X, data[3] | (data[2] << 7) | ((data[1] & 0x03) << 14)); + input_report_abs(input, ABS_Y, data[6] | (data[5] << 7) | ((data[4] & 0x03) << 14)); + input_report_abs(input, ABS_PRESSURE, pressure); + + input_report_key(input, BTN_TOUCH, data[4] & 0x08); + input_report_key(input, BTN_STYLUS, data[4] & 0x10); + /* Only allow the stylus2 button to be reported for the pen tool. */ + input_report_key(input, BTN_STYLUS2, (wacom->tool[0] == BTN_TOOL_PEN) && (data[4] & 0x20)); + } if (!prox) wacom->id[0] = 0; -- GitLab From bebb9d75e84d38c7a10fcd4619fb1554a46f4715 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 23:58:39 +0100 Subject: [PATCH 1419/1442] perf/x86/intel/uncore: Clean up hotplug conversion fallout commit 1aa6cfd33df492939b0be15ebdbcff1f8ae5ddb6 upstream. The recent conversion to the hotplug state machine kept two mechanisms from the original code: 1) The first_init logic which adds the number of online CPUs in a package to the refcount. That's wrong because the callbacks are executed for all online CPUs. Remove it so the refcounting is correct. 2) The on_each_cpu() call to undo box->init() in the error handling path. That's bogus because when the prepare callback fails no box has been initialized yet. Remove it. Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Stephane Eranian Cc: Vince Weaver Cc: Yasuaki Ishimatsu Fixes: 1a246b9f58c6 ("perf/x86/intel/uncore: Convert to hotplug state machine") Link: http://lkml.kernel.org/r/20170131230141.298032324@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/uncore.c | 44 ++++------------------------------ 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index dbaaf7dc8373..19d646a783fd 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -763,30 +763,6 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) pmu->registered = false; } -static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) -{ - struct intel_uncore_pmu *pmu = type->pmus; - struct intel_uncore_box *box; - int i, pkg; - - if (pmu) { - pkg = topology_physical_package_id(cpu); - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (box) - uncore_box_exit(box); - } - } -} - -static void uncore_exit_boxes(void *dummy) -{ - struct intel_uncore_type **types; - - for (types = uncore_msr_uncores; *types; types++) - __uncore_exit_boxes(*types++, smp_processor_id()); -} - static void uncore_free_boxes(struct intel_uncore_pmu *pmu) { int pkg; @@ -1077,22 +1053,12 @@ static int uncore_cpu_dying(unsigned int cpu) return 0; } -static int first_init; - static int uncore_cpu_starting(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, pkg, ncpus = 1; - - if (first_init) { - /* - * On init we get the number of online cpus in the package - * and set refcount for all of them. - */ - ncpus = cpumask_weight(topology_core_cpumask(cpu)); - } + int i, pkg; pkg = topology_logical_package_id(cpu); for (; *types; types++) { @@ -1103,7 +1069,7 @@ static int uncore_cpu_starting(unsigned int cpu) if (!box) continue; /* The first cpu on a package activates the box */ - if (atomic_add_return(ncpus, &box->refcnt) == ncpus) + if (atomic_inc_return(&box->refcnt) == 1) uncore_box_init(box); } } @@ -1407,19 +1373,17 @@ static int __init intel_uncore_init(void) "PERF_X86_UNCORE_PREP", uncore_cpu_prepare, NULL); } - first_init = 1; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING, "AP_PERF_X86_UNCORE_STARTING", uncore_cpu_starting, uncore_cpu_dying); - first_init = 0; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, "AP_PERF_X86_UNCORE_ONLINE", uncore_event_cpu_online, uncore_event_cpu_offline); return 0; err: - /* Undo box->init_box() */ - on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); return ret; -- GitLab From 2c2e7fe7c9c5524914772b2d44febbf5b98546d3 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 19 Jan 2017 08:49:07 -0800 Subject: [PATCH 1420/1442] dmaengine: cppi41: Fix runtime PM timeouts with USB mass storage commit ae4a3e028bb8b59e7cfeb0cc9ef03d885182ce8b upstream. Commit fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") added runtime PM support for cppi41, but had corner case issues. Some of the issues were fixed with commit 098de42ad670 ("dmaengine: cppi41: Fix unpaired pm runtime when only a USB hub is connected"). That fix however caused a new regression where we can get error -115 messages with USB on BeagleBone when connecting a USB mass storage device to a hub. This is because when connecting a USB mass storage device to a hub, the initial DMA transfers can take over 200ms to complete and cppi41 autosuspend delay times out. To fix the issue, we want to implement refcounting for chan_busy array that contains the active dma transfers. Increasing the autosuspend delay won't help as that the delay could be potentially seconds, and it's best to let the USB subsystem to deal with the timeouts on errors. The earlier attempt for runtime PM was buggy as the pm_runtime_get/put() calls could get unpaired easily as they did not follow the state of the chan_busy array as described in commit 098de42ad670 ("dmaengine: cppi41: Fix unpaired pm runtime when only a USB hub is connected". Let's fix the issue by adding pm_runtime_get() to where a new transfer is added to the chan_busy array, and calls to pm_runtime_put() where chan_busy array entry is cleared. This prevents any autosuspend timeouts from happening while dma transfers are active. Fixes: 098de42ad670 ("dmaengine: cppi41: Fix unpaired pm runtime when only a USB hub is connected") Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Cc: Andy Shevchenko Cc: Bin Liu Cc: Grygorii Strashko Cc: Kevin Hilman Cc: Patrick Titiano Cc: Sergei Shtylyov Signed-off-by: Tony Lindgren Tested-by: Bin Liu Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/cppi41.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index d5ba43a87a68..7de4fdf86a6a 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -257,6 +257,10 @@ static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) BUG_ON(desc_num >= ALLOC_DECS_NUM); c = cdd->chan_busy[desc_num]; cdd->chan_busy[desc_num] = NULL; + + /* Usecount for chan_busy[], paired with push_desc_queue() */ + pm_runtime_put(cdd->ddev.dev); + return c; } @@ -447,6 +451,15 @@ static void push_desc_queue(struct cppi41_channel *c) */ __iowmb(); + /* + * DMA transfers can take at least 200ms to complete with USB mass + * storage connected. To prevent autosuspend timeouts, we must use + * pm_runtime_get/put() when chan_busy[] is modified. This will get + * cleared in desc_to_chan() or cppi41_stop_chan() depending on the + * outcome of the transfer. + */ + pm_runtime_get(cdd->ddev.dev); + desc_phys = lower_32_bits(c->desc_phys); desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); WARN_ON(cdd->chan_busy[desc_num]); @@ -705,6 +718,9 @@ static int cppi41_stop_chan(struct dma_chan *chan) WARN_ON(!cdd->chan_busy[desc_num]); cdd->chan_busy[desc_num] = NULL; + /* Usecount for chan_busy[], paired with push_desc_queue() */ + pm_runtime_put(cdd->ddev.dev); + return 0; } -- GitLab From bc05a2e940fe96fbacf879e73139d8a66c39ab8e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 19 Jan 2017 08:49:08 -0800 Subject: [PATCH 1421/1442] dmaengine: cppi41: Fix oops in cppi41_runtime_resume commit 362f4562466c3b9490e733e06999025638310d4a upstream. Commit fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") together with recent MUSB changes allowed USB and DMA on BeagleBone to idle when no cable is connected. But looks like few corner case issues still remain. Looks like just by re-plugging USB cable about ten or so times on BeagleBone when configured in USB peripheral mode we can get warnings and eventually trigger an oops in cppi41 DMA: WARNING: CPU: 0 PID: 14 at drivers/dma/cppi41.c:1154 cppi41_runtime_suspend+ x28/0x38 [cppi41] ... WARNING: CPU: 0 PID: 14 at drivers/dma/cppi41.c:452 push_desc_queue+0x94/0x9c [cppi41] ... Unable to handle kernel NULL pointer dereference at virtual address 00000104 pgd = c0004000 [00000104] *pgd=00000000 Internal error: Oops: 805 [#1] SMP ARM ... [] (cppi41_runtime_resume [cppi41]) from [] (__rpm_callback+0xc0/0x214) [] (__rpm_callback) from [] (rpm_callback+0x20/0x80) [] (rpm_callback) from [] (rpm_resume+0x504/0x78c) [] (rpm_resume) from [] (pm_runtime_work+0x60/0xa8) [] (pm_runtime_work) from [] (process_one_work+0x2b4/0x808) This is because of a race with runtime PM and cppi41_dma_issue_pending() as reported by Alexandre Bailon in earlier set of patches. Based on mailing list discussions we however came to the conclusion that a different fix from Alexandre's fix is needed in order to guarantee that DMA is really active when we try to use it. To fix the issue, we need to add a driver specific flag as we otherwise can have -EINPROGRESS state set by runtime PM and can't rely on pm_runtime_active() to tell us when we can use the DMA. And we need to make sure the DMA transfers get triggered in the queued order. So let's always queue the transfers, then flush the queue from both cppi41_dma_issue_pending() and cppi41_runtime_resume() as suggested by Grygorii Strashko in an earlier example patch. For reference, this is also documented in Documentation/power/runtime_pm.txt in the example at the end of the file as pointed out by Grygorii Strashko . Based on earlier patches from Alexandre Bailon and Grygorii Strashko modified based on testing and what was discussed on the mailing lists. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Cc: Andy Shevchenko Cc: Bin Liu Cc: Grygorii Strashko Cc: Kevin Hilman Cc: Patrick Titiano Cc: Sergei Shtylyov Reported-by: Alexandre Bailon Signed-off-by: Tony Lindgren Tested-by: Bin Liu Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/cppi41.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 7de4fdf86a6a..55c1782e3623 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -153,6 +153,8 @@ struct cppi41_dd { /* context for suspend/resume */ unsigned int dma_tdfdq; + + bool is_suspended; }; #define FIST_COMPLETION_QUEUE 93 @@ -470,20 +472,26 @@ static void push_desc_queue(struct cppi41_channel *c) cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); } -static void pending_desc(struct cppi41_channel *c) +/* + * Caller must hold cdd->lock to prevent push_desc_queue() + * getting called out of order. We have both cppi41_dma_issue_pending() + * and cppi41_runtime_resume() call this function. + */ +static void cppi41_run_queue(struct cppi41_dd *cdd) { - struct cppi41_dd *cdd = c->cdd; - unsigned long flags; + struct cppi41_channel *c, *_c; - spin_lock_irqsave(&cdd->lock, flags); - list_add_tail(&c->node, &cdd->pending); - spin_unlock_irqrestore(&cdd->lock, flags); + list_for_each_entry_safe(c, _c, &cdd->pending, node) { + push_desc_queue(c); + list_del(&c->node); + } } static void cppi41_dma_issue_pending(struct dma_chan *chan) { struct cppi41_channel *c = to_cpp41_chan(chan); struct cppi41_dd *cdd = c->cdd; + unsigned long flags; int error; error = pm_runtime_get(cdd->ddev.dev); @@ -495,10 +503,11 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) return; } - if (likely(pm_runtime_active(cdd->ddev.dev))) - push_desc_queue(c); - else - pending_desc(c); + spin_lock_irqsave(&cdd->lock, flags); + list_add_tail(&c->node, &cdd->pending); + if (!cdd->is_suspended) + cppi41_run_queue(cdd); + spin_unlock_irqrestore(&cdd->lock, flags); pm_runtime_mark_last_busy(cdd->ddev.dev); pm_runtime_put_autosuspend(cdd->ddev.dev); @@ -1166,8 +1175,12 @@ static int __maybe_unused cppi41_resume(struct device *dev) static int __maybe_unused cppi41_runtime_suspend(struct device *dev) { struct cppi41_dd *cdd = dev_get_drvdata(dev); + unsigned long flags; + spin_lock_irqsave(&cdd->lock, flags); + cdd->is_suspended = true; WARN_ON(!list_empty(&cdd->pending)); + spin_unlock_irqrestore(&cdd->lock, flags); return 0; } @@ -1175,14 +1188,11 @@ static int __maybe_unused cppi41_runtime_suspend(struct device *dev) static int __maybe_unused cppi41_runtime_resume(struct device *dev) { struct cppi41_dd *cdd = dev_get_drvdata(dev); - struct cppi41_channel *c, *_c; unsigned long flags; spin_lock_irqsave(&cdd->lock, flags); - list_for_each_entry_safe(c, _c, &cdd->pending, node) { - push_desc_queue(c); - list_del(&c->node); - } + cdd->is_suspended = false; + cppi41_run_queue(cdd); spin_unlock_irqrestore(&cdd->lock, flags); return 0; -- GitLab From 0dcbd0aa44e6b2aa725175dd5933cb706fb1e2c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 1 Feb 2017 14:19:53 +0100 Subject: [PATCH 1422/1442] KVM: x86: do not save guest-unsupported XSAVE state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 00c87e9a70a17b355b81c36adedf05e84f54e10d upstream. Saving unsupported state prevents migration when the new host does not support a XSAVE feature of the original host, even if the feature is not exposed to the guest. We've masked host features with guest-visible features before, with 4344ee981e21 ("KVM: x86: only copy XSAVE state for the supported features") and dropped it when implementing XSAVES. Do it again. Fixes: df1daba7d1cb ("KVM: x86: support XSAVES usage in the host") Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 487b957e7802..731044efb195 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3148,6 +3148,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) memcpy(dest, xsave, XSAVE_HDR_OFFSET); /* Set XSTATE_BV */ + xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE; *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv; /* -- GitLab From 8bc382a9652bc17277caf2c3d9dad6d0b55bb7db Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Wed, 18 Jan 2017 21:31:31 +0100 Subject: [PATCH 1423/1442] USB: serial: qcserial: add Dell DW5570 QDL commit 24d615a694d649aa2e167c3f97f62bdad07e3f84 upstream. The Dell DW5570 is a re-branded Sierra Wireless MC8805 which will by default boot with vid 0x413c and pid 0x81a3. When triggered QDL download mode, the device switches to pid 0x81a6 and provides the standard TTY used for firmware upgrade. Signed-off-by: Aleksander Morgado Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 1bc6089b9008..696458db7e3c 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -124,6 +124,7 @@ static const struct usb_device_id id_table[] = { {USB_DEVICE(0x1410, 0xa021)}, /* Novatel Gobi 3000 Composite */ {USB_DEVICE(0x413c, 0x8193)}, /* Dell Gobi 3000 QDL */ {USB_DEVICE(0x413c, 0x8194)}, /* Dell Gobi 3000 Composite */ + {USB_DEVICE(0x413c, 0x81a6)}, /* Dell DW5570 QDL (MC8805) */ {USB_DEVICE(0x1199, 0x68a4)}, /* Sierra Wireless QDL */ {USB_DEVICE(0x1199, 0x68a5)}, /* Sierra Wireless Modem */ {USB_DEVICE(0x1199, 0x68a8)}, /* Sierra Wireless QDL */ -- GitLab From 4807725aab0bf4ed66e0680f41c7e7666084cb8f Mon Sep 17 00:00:00 2001 From: "Marcel J.E. Mol" Date: Mon, 30 Jan 2017 19:26:40 +0100 Subject: [PATCH 1424/1442] USB: serial: pl2303: add ATEN device ID commit d07830db1bdb254e4b50d366010b219286b8c937 upstream. Seems that ATEN serial-to-usb devices using pl2303 exist with different device ids. This patch adds a missing device ID so it is recognised by the driver. Signed-off-by: Marcel J.E. Mol Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 46fca6b75846..1db4b61bdf7b 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -49,6 +49,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID) }, + { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID2) }, { USB_DEVICE(ATEN_VENDOR_ID2, ATEN_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID_UCSGT) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index e3b7af8adfb7..09d9be88209e 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -27,6 +27,7 @@ #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 #define ATEN_PRODUCT_ID 0x2008 +#define ATEN_PRODUCT_ID2 0x2118 #define IODATA_VENDOR_ID 0x04bb #define IODATA_PRODUCT_ID 0x0a03 -- GitLab From cbd819e7db3a00630e9a0eecd5f3245e937bde98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= Date: Fri, 20 Jan 2017 19:46:34 +0100 Subject: [PATCH 1425/1442] USB: Add quirk for WORLDE easykey.25 MIDI keyboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d9b2997e4a0a874e452df7cdd7de5a54502bd0aa upstream. Add a quirk for WORLDE easykey.25 MIDI keyboard (idVendor=0218, idProduct=0401). The device reports that it has config string descriptor at index 3, but when the system selects the configuration and tries to get the description, it returns a -EPROTO error, the communication restarts and this keeps repeating over and over again. Not requesting the string descriptor makes the device work correctly. Relevant info from Wireshark: [...] CONFIGURATION DESCRIPTOR bLength: 9 bDescriptorType: 0x02 (CONFIGURATION) wTotalLength: 101 bNumInterfaces: 2 bConfigurationValue: 1 iConfiguration: 3 Configuration bmAttributes: 0xc0 SELF-POWERED NO REMOTE-WAKEUP 1... .... = Must be 1: Must be 1 for USB 1.1 and higher .1.. .... = Self-Powered: This device is SELF-POWERED ..0. .... = Remote Wakeup: This device does NOT support remote wakeup bMaxPower: 50 (100mA) [...] 45 0.369104 host 2.38.0 USB 64 GET DESCRIPTOR Request STRING [...] URB setup bmRequestType: 0x80 1... .... = Direction: Device-to-host .00. .... = Type: Standard (0x00) ...0 0000 = Recipient: Device (0x00) bRequest: GET DESCRIPTOR (6) Descriptor Index: 0x03 bDescriptorType: 0x03 Language Id: English (United States) (0x0409) wLength: 255 46 0.369255 2.38.0 host USB 64 GET DESCRIPTOR Response STRING[Malformed Packet] [...] Frame 46: 64 bytes on wire (512 bits), 64 bytes captured (512 bits) on interface 0 USB URB [Source: 2.38.0] [Destination: host] URB id: 0xffff88021f62d480 URB type: URB_COMPLETE ('C') URB transfer type: URB_CONTROL (0x02) Endpoint: 0x80, Direction: IN Device: 38 URB bus id: 2 Device setup request: not relevant ('-') Data: present (0) URB sec: 1484896277 URB usec: 455031 URB status: Protocol error (-EPROTO) (-71) URB length [bytes]: 0 Data length [bytes]: 0 [Request in: 45] [Time from request: 0.000151000 seconds] Unused Setup Header Interval: 0 Start frame: 0 Copy of Transfer Flags: 0x00000200 Number of ISO descriptors: 0 [Malformed Packet: USB] [Expert Info (Error/Malformed): Malformed Packet (Exception occurred)] [Malformed Packet (Exception occurred)] [Severity level: Error] [Group: Malformed] Signed-off-by: Lukáš Lalinský Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index d2e50a27140c..24f9f98968a5 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -37,6 +37,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* CBM - Flash disk */ { USB_DEVICE(0x0204, 0x6025), .driver_info = USB_QUIRK_RESET_RESUME }, + /* WORLDE easy key (easykey.25) MIDI controller */ + { USB_DEVICE(0x0218, 0x0401), .driver_info = + USB_QUIRK_CONFIG_INTF_STRINGS }, + /* HP 5300/5370C scanner */ { USB_DEVICE(0x03f0, 0x0701), .driver_info = USB_QUIRK_STRING_FETCH_255 }, -- GitLab From e40d15fce9293acc04db76546edc2689780961af Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 24 Jan 2017 09:18:57 -0600 Subject: [PATCH 1426/1442] usb: musb: Fix host mode error -71 regression commit 407788b51db6f6aab499d02420082f436abf3238 upstream. Commit 467d5c980709 ("usb: musb: Implement session bit based runtime PM for musb-core") started implementing musb generic runtime PM support by introducing devctl register session bit based state control. This caused a regression where if a USB mass storage device is connected to a USB hub, we can get: usb 1-1: reset high-speed USB device number 2 using musb-hdrc usb 1-1: device descriptor read/64, error -71 usb 1-1.1: new high-speed USB device number 4 using musb-hdrc This is because before the USB storage device is connected, musb is in OTG_STATE_A_SUSPEND. And we currently only set need_finish_resume in musb_stage0_irq() and the related code calling finish_resume_work in musb_resume() and musb_runtime_resume() never gets called. To fix the issue, we can call schedule_delayed_work() directly in musb_stage0_irq() to have finish_resume_work run. And we should no longer never get interrupts when when suspended. We have changed musb to no longer need pm_runtime_irqsafe(). The need_finish_resume flag was added in commit 9298b4aad37e ("usb: musb: fix device hotplug behind hub") and no longer applies as far as I can tell. So let's just remove the earlier code that no longer is needed. Fixes: 467d5c980709 ("usb: musb: Implement session bit based runtime PM for musb-core") Reported-by: Bin Liu Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 15 ++------------- drivers/usb/musb/musb_core.h | 1 - 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index c3e172e15ec3..338575fb2d27 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -578,11 +578,11 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, | MUSB_PORT_STAT_RESUME; musb->rh_timer = jiffies + msecs_to_jiffies(USB_RESUME_TIMEOUT); - musb->need_finish_resume = 1; - musb->xceiv->otg->state = OTG_STATE_A_HOST; musb->is_active = 1; musb_host_resume_root_hub(musb); + schedule_delayed_work(&musb->finish_resume_work, + msecs_to_jiffies(USB_RESUME_TIMEOUT)); break; case OTG_STATE_B_WAIT_ACON: musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL; @@ -2691,11 +2691,6 @@ static int musb_resume(struct device *dev) mask = MUSB_DEVCTL_BDEVICE | MUSB_DEVCTL_FSDEV | MUSB_DEVCTL_LSDEV; if ((devctl & mask) != (musb->context.devctl & mask)) musb->port1_status = 0; - if (musb->need_finish_resume) { - musb->need_finish_resume = 0; - schedule_delayed_work(&musb->finish_resume_work, - msecs_to_jiffies(USB_RESUME_TIMEOUT)); - } /* * The USB HUB code expects the device to be in RPM_ACTIVE once it came @@ -2747,12 +2742,6 @@ static int musb_runtime_resume(struct device *dev) musb_restore_context(musb); - if (musb->need_finish_resume) { - musb->need_finish_resume = 0; - schedule_delayed_work(&musb->finish_resume_work, - msecs_to_jiffies(USB_RESUME_TIMEOUT)); - } - spin_lock_irqsave(&musb->lock, flags); error = musb_run_resume_work(musb); if (error) diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 47331dbdde29..854fbf7b6b23 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -410,7 +410,6 @@ struct musb { /* is_suspended means USB B_PERIPHERAL suspend */ unsigned is_suspended:1; - unsigned need_finish_resume :1; /* may_wakeup means remote wakeup is enabled */ unsigned may_wakeup:1; -- GitLab From 12a9c11c2e198ae879a321f8c3b5a2df73f8424f Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Wed, 18 Jan 2017 00:57:44 +0000 Subject: [PATCH 1427/1442] usb: gadget: f_fs: Assorted buffer overflow checks. commit 83e526f2a2fa4b2e82b6bd3ddbb26b70acfa8947 upstream. OS descriptor head, when flagged as provided, is accessed without checking if it fits in provided buffer. Verify length before access. Also, there are other places where buffer length it checked after accessing offsets which are potentially past the end. Check buffer length before as well to fail cleanly. Signed-off-by: Vincent Pelletier Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 17989b72cdae..8d412d8b1f29 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2269,6 +2269,8 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, if (len < sizeof(*d) || h->interface >= ffs->interfaces_count) return -EINVAL; length = le32_to_cpu(d->dwSize); + if (len < length) + return -EINVAL; type = le32_to_cpu(d->dwPropertyDataType); if (type < USB_EXT_PROP_UNICODE || type > USB_EXT_PROP_UNICODE_MULTI) { @@ -2277,6 +2279,11 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, return -EINVAL; } pnl = le16_to_cpu(d->wPropertyNameLength); + if (length < 14 + pnl) { + pr_vdebug("invalid os descriptor length: %d pnl:%d (descriptor %d)\n", + length, pnl, type); + return -EINVAL; + } pdl = le32_to_cpu(*(u32 *)((u8 *)data + 10 + pnl)); if (length != 14 + pnl + pdl) { pr_vdebug("invalid os descriptor length: %d pnl:%d pdl:%d (descriptor %d)\n", @@ -2363,6 +2370,9 @@ static int __ffs_data_got_descs(struct ffs_data *ffs, } } if (flags & (1 << i)) { + if (len < 4) { + goto error; + } os_descs_count = get_unaligned_le32(data); data += 4; len -= 4; @@ -2435,7 +2445,8 @@ static int __ffs_data_got_strings(struct ffs_data *ffs, ENTER(); - if (unlikely(get_unaligned_le32(data) != FUNCTIONFS_STRINGS_MAGIC || + if (unlikely(len < 16 || + get_unaligned_le32(data) != FUNCTIONFS_STRINGS_MAGIC || get_unaligned_le32(data + 4) != len)) goto error; str_count = get_unaligned_le32(data + 8); -- GitLab From 89cc65c1117f8c5a44439706eb2dce51b0491266 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 24 Jan 2017 10:31:18 +0100 Subject: [PATCH 1428/1442] USB: serial: option: add device ID for HP lt2523 (Novatel E371) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5d03a2fd2292e71936c4235885c35ccc3c94695b upstream. Yet another laptop vendor rebranded Novatel E371. Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7ce31a4c7e7f..42cc72e54c05 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2007,6 +2007,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */ { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- GitLab From db0e02ef6b821a42f7270ceaa61e800399d0950c Mon Sep 17 00:00:00 2001 From: Rui Miguel Silva Date: Mon, 23 Jan 2017 16:32:57 +0000 Subject: [PATCH 1429/1442] staging: greybus: timesync: validate platform state callback commit b17c1bba9cec1727451b906d9a0c209774624873 upstream. When tearingdown timesync, and not in arche platform, the state platform callback is not initialized. That will trigger the following NULL dereferencing. CallTrace: ? gb_timesync_platform_unlock_bus+0x11/0x20 [greybus] gb_timesync_teardown+0x85/0xc0 [greybus] gb_timesync_svc_remove+0xab/0x190 [greybus] gb_svc_del+0x29/0x110 [greybus] gb_hd_del+0x14/0x20 [greybus] ap_disconnect+0x24/0x60 [gb_es2] usb_unbind_interface+0x7a/0x2c0 __device_release_driver+0x96/0x150 device_release_driver+0x1e/0x30 bus_remove_device+0xe7/0x130 device_del+0x116/0x230 usb_disable_device+0x97/0x1f0 usb_disconnect+0x80/0x260 hub_event+0x5ca/0x10e0 process_one_work+0x126/0x3b0 worker_thread+0x55/0x4c0 ? process_one_work+0x3b0/0x3b0 kthread+0xc4/0xe0 ? kthread_park+0xb0/0xb0 ret_from_fork+0x22/0x30 So, fix that by adding checks before use the callback. Fixes: 970dc85bd95d ("greybus: timesync: Add timesync core driver") Signed-off-by: Rui Miguel Silva Reviewed-by: Viresh Kumar Reviewed-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/timesync_platform.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/staging/greybus/timesync_platform.c b/drivers/staging/greybus/timesync_platform.c index 113f3d6c4b3a..27f75b17679b 100644 --- a/drivers/staging/greybus/timesync_platform.c +++ b/drivers/staging/greybus/timesync_platform.c @@ -45,12 +45,18 @@ u32 gb_timesync_platform_get_clock_rate(void) int gb_timesync_platform_lock_bus(struct gb_timesync_svc *pdata) { + if (!arche_platform_change_state_cb) + return 0; + return arche_platform_change_state_cb(ARCHE_PLATFORM_STATE_TIME_SYNC, pdata); } void gb_timesync_platform_unlock_bus(void) { + if (!arche_platform_change_state_cb) + return; + arche_platform_change_state_cb(ARCHE_PLATFORM_STATE_ACTIVE, NULL); } -- GitLab From 54d2ccc4003b7ec3378349df62b1642a2a4aa51c Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 16 Jan 2017 11:27:52 -0800 Subject: [PATCH 1430/1442] iio: adc: palmas_gpadc: retrieve a valid iio_dev in suspend/resume commit d1aaf20ee655888c227d5137b7a63551f8d15416 upstream. The suspend/resume functions were using dev_to_iio_dev() to get the iio_dev. That only works on IIO dev's. Use dev_get_drvdata() for a platform device to get the correct iio_dev. Signed-off-by: Alison Schofield Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/palmas_gpadc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c index 2bbf0c521beb..7d61b566e148 100644 --- a/drivers/iio/adc/palmas_gpadc.c +++ b/drivers/iio/adc/palmas_gpadc.c @@ -775,7 +775,7 @@ static int palmas_adc_wakeup_reset(struct palmas_gpadc *adc) static int palmas_gpadc_suspend(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = dev_get_drvdata(dev); struct palmas_gpadc *adc = iio_priv(indio_dev); int wakeup = adc->wakeup1_enable || adc->wakeup2_enable; int ret; @@ -798,7 +798,7 @@ static int palmas_gpadc_suspend(struct device *dev) static int palmas_gpadc_resume(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = dev_get_drvdata(dev); struct palmas_gpadc *adc = iio_priv(indio_dev); int wakeup = adc->wakeup1_enable || adc->wakeup2_enable; int ret; -- GitLab From 5f0ee562605b4290959675830273d5485616d0cd Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Sat, 14 Jan 2017 19:52:50 -0800 Subject: [PATCH 1431/1442] iio: health: afe4404: retrieve a valid iio_dev in suspend/resume commit 802ecfc113df1e15af1d028427cbbe785ae9cc4a upstream. The suspend/resume functions were using dev_to_iio_dev() to get the iio_dev. That only works on IIO dev's. Replace it with i2c functions to get the correct iio_dev. Signed-off-by: Alison Schofield Acked-by: Andrew F. Davis Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/health/afe4404.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c index 45266404f7e3..964f5231a831 100644 --- a/drivers/iio/health/afe4404.c +++ b/drivers/iio/health/afe4404.c @@ -428,7 +428,7 @@ MODULE_DEVICE_TABLE(of, afe4404_of_match); static int __maybe_unused afe4404_suspend(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev)); struct afe4404_data *afe = iio_priv(indio_dev); int ret; @@ -449,7 +449,7 @@ static int __maybe_unused afe4404_suspend(struct device *dev) static int __maybe_unused afe4404_resume(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev)); struct afe4404_data *afe = iio_priv(indio_dev); int ret; -- GitLab From 7baa8491d47d5ad70a22481fbea1913e4b1a746a Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Sat, 14 Jan 2017 19:51:52 -0800 Subject: [PATCH 1432/1442] iio: health: afe4403: retrieve a valid iio_dev in suspend/resume commit a5badd1e97e6caeca78ad74191f12fc923c403a8 upstream. The suspend/resume functions were using dev_to_iio_dev() to get the iio_dev. That only works on IIO dev's. Replace it with spi functions to get the correct iio_dev. Signed-off-by: Alison Schofield Acked-by: Andrew F. Davis Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/health/afe4403.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c index 9a081465c42f..6bb23a49e81e 100644 --- a/drivers/iio/health/afe4403.c +++ b/drivers/iio/health/afe4403.c @@ -422,7 +422,7 @@ MODULE_DEVICE_TABLE(of, afe4403_of_match); static int __maybe_unused afe4403_suspend(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = spi_get_drvdata(to_spi_device(dev)); struct afe4403_data *afe = iio_priv(indio_dev); int ret; @@ -443,7 +443,7 @@ static int __maybe_unused afe4403_suspend(struct device *dev) static int __maybe_unused afe4403_resume(struct device *dev) { - struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev *indio_dev = spi_get_drvdata(to_spi_device(dev)); struct afe4403_data *afe = iio_priv(indio_dev); int ret; -- GitLab From 12c2fcf6bcf7da2d861ab0fe36550e39dd783ff9 Mon Sep 17 00:00:00 2001 From: John Brooks Date: Wed, 18 Jan 2017 21:50:39 +0000 Subject: [PATCH 1433/1442] iio: dht11: Use usleep_range instead of msleep for start signal commit 5c113b5e0082e90d2e1c7b12e96a7b8cf0623e27 upstream. The DHT22 (AM2302) datasheet specifies that the LOW start pulse should not exceed 20ms. However, observations with an oscilloscope of an RPi Model 2B (rev 1.1) communicating with a DHT22 sensor showed that the driver was consistently sending start pulses longer than 20ms: Kernel 4.7.10-v7+ (n=132): Minimum pulse length: 20.20ms Maximum: 29.84ms Mean: 24.96ms StDev: 2.82ms Sensor response rate: 100% Read success rate: 76% On kernel 4.8, the start pulse was so long that the sensor would not even respond 97% of the time: Kernel 4.8.16-v7+ (n=100): Minimum pulse length: 30.4ms Maximum: 74.4ms Mean: 39.3ms StDev: 10.2ms Sensor response rate: 3% Read success rate: 3% The driver would return ETIMEDOUT and write log messages like this: [ 51.430987] dht11 dht11@0: Only 1 signal edges detected [ 66.311019] dht11 dht11@0: Only 0 signal edges detected Replacing msleep(18) with usleep_range(18000, 20000) made the pulse length sane again and restored responsiveness: Kernel 4.8.16-v7+ with usleep_range (n=123): Minimum pulse length: 18.16ms Maximum: 20.20ms Mean: 19.85ms StDev: 0.51ms Sensor response rate: 100% Read success rate: 84% Signed-off-by: John Brooks Reviewed-by: Harald Geyer Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/humidity/dht11.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/humidity/dht11.c b/drivers/iio/humidity/dht11.c index 9c47bc98f3ac..2a22ad920333 100644 --- a/drivers/iio/humidity/dht11.c +++ b/drivers/iio/humidity/dht11.c @@ -71,7 +71,8 @@ * a) select an implementation using busy loop polling on those systems * b) use the checksum to do some probabilistic decoding */ -#define DHT11_START_TRANSMISSION 18 /* ms */ +#define DHT11_START_TRANSMISSION_MIN 18000 /* us */ +#define DHT11_START_TRANSMISSION_MAX 20000 /* us */ #define DHT11_MIN_TIMERES 34000 /* ns */ #define DHT11_THRESHOLD 49000 /* ns */ #define DHT11_AMBIG_LOW 23000 /* ns */ @@ -228,7 +229,8 @@ static int dht11_read_raw(struct iio_dev *iio_dev, ret = gpio_direction_output(dht11->gpio, 0); if (ret) goto err; - msleep(DHT11_START_TRANSMISSION); + usleep_range(DHT11_START_TRANSMISSION_MIN, + DHT11_START_TRANSMISSION_MAX); ret = gpio_direction_input(dht11->gpio); if (ret) goto err; -- GitLab From c6b0333baea0acbe792c22ac71a04e40e26a8398 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Mon, 16 Jan 2017 18:04:18 -0800 Subject: [PATCH 1434/1442] iio: health: max30100: fixed parenthesis around FIFO count check commit 828f84ee8f84710ea1818b3565add268bcb824c8 upstream. FIFO was being read every sample after the "almost full" state was reached. This was due to an incorrect placement of the parenthesis in the while condition check. Note - the fixes tag is not actually correct, but the fix in this patch would also be needed for it to function correctly so we'll go with that one. Backports should pick up both. Signed-off-by: Matt Ranostay Fixes: b74fccad7 ("iio: health: max30100: correct FIFO check condition") Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/health/max30100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/health/max30100.c b/drivers/iio/health/max30100.c index 90ab8a2d2846..183c14329d6e 100644 --- a/drivers/iio/health/max30100.c +++ b/drivers/iio/health/max30100.c @@ -238,7 +238,7 @@ static irqreturn_t max30100_interrupt_handler(int irq, void *private) mutex_lock(&data->lock); - while (cnt || (cnt = max30100_fifo_count(data) > 0)) { + while (cnt || (cnt = max30100_fifo_count(data)) > 0) { ret = max30100_read_measurement(data); if (ret) break; -- GitLab From e02136282296dbc90f3c88b1cc5202ec0d5ed9f1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 17 Jan 2017 16:00:48 +0000 Subject: [PATCH 1435/1442] irqdomain: Avoid activating interrupts more than once commit 08d85f3ea99f1eeafc4e8507936190e86a16ee8c upstream. Since commit f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early"), we can end-up activating a PCI/MSI twice (once at allocation time, and once at startup time). This is normally of no consequences, except that there is some HW out there that may misbehave if activate is used more than once (the GICv3 ITS, for example, uses the activate callback to issue the MAPVI command, and the architecture spec says that "If there is an existing mapping for the EventID-DeviceID combination, behavior is UNPREDICTABLE"). While this could be worked around in each individual driver, it may make more sense to tackle the issue at the core level. In order to avoid getting in that situation, let's have a per-interrupt flag to remember if we have already activated that interrupt or not. Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early") Reported-and-tested-by: Andre Przywara Signed-off-by: Marc Zyngier Link: http://lkml.kernel.org/r/1484668848-24361-1-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/irq.h | 17 ++++++++++++++++ kernel/irq/irqdomain.c | 44 ++++++++++++++++++++++++++++-------------- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index e79875574b39..39e3254e5769 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -184,6 +184,7 @@ struct irq_data { * * IRQD_TRIGGER_MASK - Mask for the trigger type bits * IRQD_SETAFFINITY_PENDING - Affinity setting is pending + * IRQD_ACTIVATED - Interrupt has already been activated * IRQD_NO_BALANCING - Balancing disabled for this IRQ * IRQD_PER_CPU - Interrupt is per cpu * IRQD_AFFINITY_SET - Interrupt affinity was set @@ -202,6 +203,7 @@ struct irq_data { enum { IRQD_TRIGGER_MASK = 0xf, IRQD_SETAFFINITY_PENDING = (1 << 8), + IRQD_ACTIVATED = (1 << 9), IRQD_NO_BALANCING = (1 << 10), IRQD_PER_CPU = (1 << 11), IRQD_AFFINITY_SET = (1 << 12), @@ -312,6 +314,21 @@ static inline bool irqd_affinity_is_managed(struct irq_data *d) return __irqd_to_state(d) & IRQD_AFFINITY_MANAGED; } +static inline bool irqd_is_activated(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_ACTIVATED; +} + +static inline void irqd_set_activated(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_ACTIVATED; +} + +static inline void irqd_clr_activated(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_ACTIVATED; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 8c0a0ae43521..b59e6768c5e9 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1346,6 +1346,30 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain, } EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); +static void __irq_domain_activate_irq(struct irq_data *irq_data) +{ + if (irq_data && irq_data->domain) { + struct irq_domain *domain = irq_data->domain; + + if (irq_data->parent_data) + __irq_domain_activate_irq(irq_data->parent_data); + if (domain->ops->activate) + domain->ops->activate(domain, irq_data); + } +} + +static void __irq_domain_deactivate_irq(struct irq_data *irq_data) +{ + if (irq_data && irq_data->domain) { + struct irq_domain *domain = irq_data->domain; + + if (domain->ops->deactivate) + domain->ops->deactivate(domain, irq_data); + if (irq_data->parent_data) + __irq_domain_deactivate_irq(irq_data->parent_data); + } +} + /** * irq_domain_activate_irq - Call domain_ops->activate recursively to activate * interrupt @@ -1356,13 +1380,9 @@ EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); */ void irq_domain_activate_irq(struct irq_data *irq_data) { - if (irq_data && irq_data->domain) { - struct irq_domain *domain = irq_data->domain; - - if (irq_data->parent_data) - irq_domain_activate_irq(irq_data->parent_data); - if (domain->ops->activate) - domain->ops->activate(domain, irq_data); + if (!irqd_is_activated(irq_data)) { + __irq_domain_activate_irq(irq_data); + irqd_set_activated(irq_data); } } @@ -1376,13 +1396,9 @@ void irq_domain_activate_irq(struct irq_data *irq_data) */ void irq_domain_deactivate_irq(struct irq_data *irq_data) { - if (irq_data && irq_data->domain) { - struct irq_domain *domain = irq_data->domain; - - if (domain->ops->deactivate) - domain->ops->deactivate(domain, irq_data); - if (irq_data->parent_data) - irq_domain_deactivate_irq(irq_data->parent_data); + if (irqd_is_activated(irq_data)) { + __irq_domain_deactivate_irq(irq_data); + irqd_clr_activated(irq_data); } } -- GitLab From 13363b6988f60be9f75146a52476a5e8d55f503c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 19:03:21 +0100 Subject: [PATCH 1436/1442] x86/irq: Make irq activate operations symmetric commit aaaec6fc755447a1d056765b11b24d8ff2b81366 upstream. The recent commit which prevents double activation of interrupts unearthed interesting code in x86. The code (ab)uses irq_domain_activate_irq() to reconfigure an already activated interrupt. That trips over the prevention code now. Fix it by deactivating the interrupt before activating the new configuration. Fixes: 08d85f3ea99f1 "irqdomain: Avoid activating interrupts more than once" Reported-and-tested-by: Mike Galbraith Reported-and-tested-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Andrey Ryabinin Cc: Marc Zyngier Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701311901580.3457@nanos Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/io_apic.c | 2 ++ arch/x86/kernel/hpet.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3d8ff40ecc6f..7249f1500bcb 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2118,6 +2118,7 @@ static inline void __init check_timer(void) if (idx != -1 && irq_trigger(idx)) unmask_ioapic_irq(irq_get_chip_data(0)); } + irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); if (timer_irq_works()) { if (disable_timer_pin_1 > 0) @@ -2139,6 +2140,7 @@ static inline void __init check_timer(void) * legacy devices should be connected to IO APIC #0 */ replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2); + irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); legacy_pic->unmask(0); if (timer_irq_works()) { diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 274fab99169d..932348fbb6ea 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -352,6 +352,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) } else { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); disable_irq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); -- GitLab From bf9ab22c350d17af861681348273df5420ac182d Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 15 Dec 2016 08:09:35 -0800 Subject: [PATCH 1437/1442] iw_cxgb4: set correct FetchBurstMax for QPs commit b414fa01c31318383ae29d9d23cb9ca4184bbd86 upstream. The current QP FetchBurstMax value is 256B, which is incorrect since a WR can exceed that value. The result being a partial WR fetched by hardware, and a fatal "bad WR" error posted by the SGE. So bump the FetchBurstMax to 512B. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb4/qp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index bb0fde6e2047..cc2243f6cc7f 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -321,7 +321,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_DCAEN_V(0) | FW_RI_RES_WR_DCACPU_V(0) | FW_RI_RES_WR_FBMIN_V(2) | - FW_RI_RES_WR_FBMAX_V(2) | + (t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_FBMAX_V(2) : + FW_RI_RES_WR_FBMAX_V(3)) | FW_RI_RES_WR_CIDXFTHRESHO_V(0) | FW_RI_RES_WR_CIDXFTHRESH_V(0) | FW_RI_RES_WR_EQSIZE_V(eqsize)); @@ -345,7 +346,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_DCAEN_V(0) | FW_RI_RES_WR_DCACPU_V(0) | FW_RI_RES_WR_FBMIN_V(2) | - FW_RI_RES_WR_FBMAX_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | FW_RI_RES_WR_CIDXFTHRESHO_V(0) | FW_RI_RES_WR_CIDXFTHRESH_V(0) | FW_RI_RES_WR_EQSIZE_V(eqsize)); -- GitLab From 72cd604cfd864b06c5a10d2bb139b19825e0fcbc Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 3 Feb 2017 13:13:26 -0800 Subject: [PATCH 1438/1442] fs: break out of iomap_file_buffered_write on fatal signals commit d1908f52557b3230fbd63c0429f3b4b748bf2b6d upstream. Tetsuo has noticed that an OOM stress test which performs large write requests can cause the full memory reserves depletion. He has tracked this down to the following path __alloc_pages_nodemask+0x436/0x4d0 alloc_pages_current+0x97/0x1b0 __page_cache_alloc+0x15d/0x1a0 mm/filemap.c:728 pagecache_get_page+0x5a/0x2b0 mm/filemap.c:1331 grab_cache_page_write_begin+0x23/0x40 mm/filemap.c:2773 iomap_write_begin+0x50/0xd0 fs/iomap.c:118 iomap_write_actor+0xb5/0x1a0 fs/iomap.c:190 ? iomap_write_end+0x80/0x80 fs/iomap.c:150 iomap_apply+0xb3/0x130 fs/iomap.c:79 iomap_file_buffered_write+0x68/0xa0 fs/iomap.c:243 ? iomap_write_end+0x80/0x80 xfs_file_buffered_aio_write+0x132/0x390 [xfs] ? remove_wait_queue+0x59/0x60 xfs_file_write_iter+0x90/0x130 [xfs] __vfs_write+0xe5/0x140 vfs_write+0xc7/0x1f0 ? syscall_trace_enter+0x1d0/0x380 SyS_write+0x58/0xc0 do_syscall_64+0x6c/0x200 entry_SYSCALL64_slow_path+0x25/0x25 the oom victim has access to all memory reserves to make a forward progress to exit easier. But iomap_file_buffered_write and other callers of iomap_apply loop to complete the full request. We need to check for fatal signals and back off with a short write instead. As the iomap_apply delegates all the work down to the actor we have to hook into those. All callers that work with the page cache are calling iomap_write_begin so we will check for signals there. dax_iomap_actor has to handle the situation explicitly because it copies data to the userspace directly. Other callers like iomap_page_mkwrite work on a single page or iomap_fiemap_actor do not allocate memory based on the given len. Fixes: 68a9f5e7007c ("xfs: implement iomap based buffered write path") Link: http://lkml.kernel.org/r/20170201092706.9966-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Tetsuo Handa Reviewed-by: Christoph Hellwig Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/dax.c | 5 +++++ fs/iomap.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index 014defd2e744..bf6218da7928 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1270,6 +1270,11 @@ iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct blk_dax_ctl dax = { 0 }; ssize_t map_len; + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } + dax.sector = iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK; diff --git a/fs/iomap.c b/fs/iomap.c index a8ee8c33ca78..814ae8f9587d 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -113,6 +113,9 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, BUG_ON(pos + len > iomap->offset + iomap->length); + if (fatal_signal_pending(current)) + return -EINTR; + page = grab_cache_page_write_begin(inode->i_mapping, index, flags); if (!page) return -ENOMEM; -- GitLab From f2a0409a08502d64fbe3990354dff5902b08d2fb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 21 Sep 2016 14:51:08 +0100 Subject: [PATCH 1439/1442] drm/i915/execlists: Reset RING registers upon resume commit bafb2f7d4755bf1571bd5e9a03b97f3fc4fe69ae upstream. There is a disparity in the context image saved to disk and our own bookkeeping - that is we presume the RING_HEAD and RING_TAIL match our stored ce->ring->tail value. However, as we emit WA_TAIL_DWORDS into the ring but may not tell the GPU about them, the GPU may be lagging behind our bookkeeping. Upon hibernation we do not save stolen pages, presuming that their contents are volatile. This means that although we start writing into the ring at tail, the GPU starts executing from its HEAD and there may be some garbage in between and so the GPU promptly hangs upon resume. Testcase: igt/gem_exec_suspend/basic-S4 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96526 Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20160921135108.29574-3-chris@chris-wilson.co.uk Cc: Eric Blau Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lrc.c | 50 ++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 67db1577ee49..4147e51cf893 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2152,30 +2152,42 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, void intel_lr_context_resume(struct drm_i915_private *dev_priv) { - struct i915_gem_context *ctx = dev_priv->kernel_context; struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + + /* Because we emit WA_TAIL_DWORDS there may be a disparity + * between our bookkeeping in ce->ring->head and ce->ring->tail and + * that stored in context. As we only write new commands from + * ce->ring->tail onwards, everything before that is junk. If the GPU + * starts reading from its RING_HEAD from the context, it may try to + * execute that junk and die. + * + * So to avoid that we reset the context images upon resume. For + * simplicity, we just zero everything out. + */ + list_for_each_entry(ctx, &dev_priv->context_list, link) { + for_each_engine(engine, dev_priv) { + struct intel_context *ce = &ctx->engine[engine->id]; + u32 *reg; - for_each_engine(engine, dev_priv) { - struct intel_context *ce = &ctx->engine[engine->id]; - void *vaddr; - uint32_t *reg_state; - - if (!ce->state) - continue; - - vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); - if (WARN_ON(IS_ERR(vaddr))) - continue; + if (!ce->state) + continue; - reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + reg = i915_gem_object_pin_map(ce->state->obj, + I915_MAP_WB); + if (WARN_ON(IS_ERR(reg))) + continue; - reg_state[CTX_RING_HEAD+1] = 0; - reg_state[CTX_RING_TAIL+1] = 0; + reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg); + reg[CTX_RING_HEAD+1] = 0; + reg[CTX_RING_TAIL+1] = 0; - ce->state->obj->dirty = true; - i915_gem_object_unpin_map(ce->state->obj); + ce->state->obj->dirty = true; + i915_gem_object_unpin_map(ce->state->obj); - ce->ring->head = 0; - ce->ring->tail = 0; + ce->ring->head = ce->ring->tail = 0; + ce->ring->last_retired_head = -1; + intel_ring_update_space(ce->ring); + } } } -- GitLab From d2e4b66b4ef24aeecb0cdef7278aa00850461de9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Feb 2017 08:08:40 +0100 Subject: [PATCH 1440/1442] Linux 4.9.9 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1130803ab93c..c0c41c9fac0c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 8 +SUBLEVEL = 9 EXTRAVERSION = NAME = Roaring Lionus -- GitLab From d8a4d0354d3c5ec73896db40b6aa387aa84db119 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Wed, 8 Feb 2017 16:37:17 -0800 Subject: [PATCH 1441/1442] ANDROID: sched/walt: use div_u64 instead of do_div Change-Id: I1ff13f1e409051167e2ff5bd18a1299c78a46306 Signed-off-by: Dmitry Shmidt --- include/trace/events/sched.h | 3 ++- kernel/sched/sched.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index d9169f9d783a..12927f326d61 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -1033,7 +1033,8 @@ TRACE_EVENT(walt_update_history, __entry->evt = evt; __entry->demand = p->ravg.demand; __entry->walt_avg = (__entry->demand << 10); - do_div(__entry->walt_avg, walt_ravg_window); + __entry->walt_avg = div_u64(__entry->walt_avg, + walt_ravg_window); __entry->pelt_avg = p->se.avg.util_avg; memcpy(__entry->hist, p->ravg.sum_history, RAVG_HIST_SIZE_MAX * sizeof(u32)); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e9ed87ea20eb..b6839c2fdb50 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1642,7 +1642,7 @@ static inline unsigned long __cpu_util(int cpu, int delta) #ifdef CONFIG_SCHED_WALT if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { util = cpu_rq(cpu)->prev_runnable_sum << SCHED_CAPACITY_SHIFT; - do_div(util, walt_ravg_window); + util = div_u64(util, walt_ravg_window); } #endif delta += util; -- GitLab From e42549628dfc3bda2c512e0e7c13d0cae5000e8d Mon Sep 17 00:00:00 2001 From: Lingfeng Yang Date: Thu, 9 Feb 2017 07:43:47 -0800 Subject: [PATCH 1442/1442] ANDROID: goldfish_sync: 32 max cmds to save stack We never use the 64 anyway. Should fix compile warning. Change-Id: I384155cc24e0a259d7d3898f310fc33154991311 Signed-off-by: Lingfeng Yang --- drivers/staging/goldfish/goldfish_sync_timeline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/goldfish/goldfish_sync_timeline.c b/drivers/staging/goldfish/goldfish_sync_timeline.c index 0dac94f5accd..5bef4c6c0283 100644 --- a/drivers/staging/goldfish/goldfish_sync_timeline.c +++ b/drivers/staging/goldfish/goldfish_sync_timeline.c @@ -127,7 +127,7 @@ struct goldfish_sync_guestcmd { uint64_t guest_timeline_handle; }; -#define GOLDFISH_SYNC_MAX_CMDS 64 +#define GOLDFISH_SYNC_MAX_CMDS 32 struct goldfish_sync_state { char __iomem *reg_base; -- GitLab